1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2016 Nexenta Systems, Inc.
  14  */
  15 
  16 #include <sys/aoe.h>
  17 #include <sys/blkdev.h>
  18 #include <sys/conf.h>
  19 #include <sys/debug.h>
  20 #include <sys/devops.h>
  21 #include <sys/dktp/bbh.h>
  22 #include <sys/dktp/cmdk.h>
  23 #include <sys/errno.h>
  24 #include <sys/file.h>
  25 #include <sys/kmem.h>
  26 #include <sys/ksynch.h>
  27 #include <sys/log.h>
  28 #include <sys/mac_client.h>
  29 #include <sys/modctl.h>
  30 #include <sys/modctl.h>
  31 #include <sys/param.h>
  32 #include <sys/pci.h>
  33 #include <sys/stat.h>
  34 #include <sys/stream.h>
  35 #include <sys/stropts.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/strsun.h>
  38 #include <sys/sunndi.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/types.h>
  41 #include <sys/sata/sata_hba.h>
  42 
  43 static char *aoe_errlist[] =
  44 {
  45         "no such error",
  46         "unrecognized command code",
  47         "bad argument parameter",
  48         "device unavailable",
  49         "config string present",
  50         "unsupported version",
  51         "target is reserved"
  52 };
  53 
  54 #define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
  55 
  56 #define NECODES         (sizeof (aoe_errlist) / sizeof (char *) - 1)
  57 #define TIMERTICK       (hz / 10)
  58 #define MINTIMER        (50 * TIMERTICK)
  59 #define MAXTIMER        (hz << 4)
  60 #define MAXWAIT         60 /* MAXWAIT rexmit time, fail device. */
  61 #define DISCTIMER       10 /* Periodic disk discovery timer */
  62 
  63 #define OP_READ         0
  64 #define OP_WRITE        1
  65 #define OP_FLUSH        2
  66 
  67 int aoeblk_maxwait = MAXWAIT;
  68 int aoeblk_wc = 1;
  69 
  70 /*
  71  * Driver's global variables
  72  */
  73 static char aoeblk_ident[]              = "AOE IO block driver";
  74 
  75 static int aoeblk_attach(dev_info_t *, ddi_attach_cmd_t);
  76 static int aoeblk_detach(dev_info_t *, ddi_detach_cmd_t);
  77 
  78 static struct dev_ops aoeblk_dev_ops = {
  79         DEVO_REV,
  80         0,
  81         ddi_no_info,
  82         nulldev,                /* identify */
  83         nulldev,                /* probe */
  84         aoeblk_attach,          /* attach */
  85         aoeblk_detach,          /* detach */
  86         nodev,                  /* reset */
  87         NULL,                   /* cb_ops */
  88         NULL,                   /* bus_ops */
  89         ddi_power,              /* power */
  90         ddi_quiesce_not_needed  /* quiesce */
  91 };
  92 
  93 /* Standard Module linkage initialization for a Streams driver */
  94 extern struct mod_ops mod_driverops;
  95 
  96 static struct modldrv modldrv = {
  97         &mod_driverops,         /* Type of module.  This one is a driver */
  98         aoeblk_ident,       /* short description */
  99         &aoeblk_dev_ops         /* driver specific ops */
 100 };
 101 
 102 static struct modlinkage modlinkage = {
 103         MODREV_1,
 104         {
 105                 (void *)&modldrv,
 106                 NULL,
 107         },
 108 };
 109 
 110 typedef struct aoeblk_stats {
 111         struct kstat_named      sts_rw_outofmemory;
 112         struct kstat_named      sts_rw_badoffset;
 113         struct kstat_named      sts_error_packets;
 114         struct kstat_named      sts_unsolicited_packets;
 115         struct kstat_named      sts_unsolicited_xfers;
 116         struct kstat_named      sts_bad_xfers;
 117         struct kstat_named      sts_rexmit_packets;
 118 } aoeblk_stats_t;
 119 
 120 typedef struct aoeblk_softc {
 121         dev_info_t              *dev;
 122         aoe_eport_t             *eport;
 123         aoe_frame_t             *bcast_f[AOE_MAX_MACOBJ];
 124         aoeblk_stats_t          *ks_data;
 125         kstat_t                 *sc_intrstat;
 126         volatile timeout_id_t   discovery_timeout_id;
 127         kmutex_t                bd_mutex;
 128         list_t                  bd_list;
 129 } aoeblk_softc_t;
 130 
 131 #define DEVFL_UP        1       /* device is ready for AoE->ATA commands */
 132 #define DEVFL_OPEN      (1<<1)
 133 #define DEVFL_TKILL     (1<<2)    /* flag for timer to know when to kill self */
 134 #define DEVFL_EXT       (1<<3)    /* device accepts lba48 commands */
 135 #define DEVFL_CLOSEWAIT (1<<4)    /* device is waiting for close to revalidate */
 136 #define DEVFL_ATTWAIT   (1<<5)    /* disk responded to ATA ID, to be attached */
 137 #define DEVFL_WC_UPDATE (1<<6)    /* device needs to update write cache status */
 138 #define DEVFL_DESTROY   (1<<7)
 139 #define DEVFL_STATECHG  (1<<8)
 140 
 141 /*
 142  * The maximum number of outstanding frames
 143  */
 144 #define AOEDISK_MAXFRAMES       8192    /* default value */
 145 static int aoedisk_maxframes;
 146 
 147 typedef struct xfer_private {
 148         uint32_t        msgs_cnt;
 149         uint32_t        msgs_sent;
 150         uint32_t        err_cnt;
 151 } xfer_private_t;
 152 
 153 #define xfer_priv(x_v)  ((xfer_private_t *)&(x_v)->x_dmac)
 154 
 155 typedef struct aoedisk {
 156         list_node_t             node;
 157         list_t                  frames;
 158         aoeblk_softc_t          *sc;
 159         bd_handle_t             bd_h;
 160         dev_info_t              *dev;
 161         void                    *master_mac;
 162         aoe_eport_t             *eport;
 163         volatile timeout_id_t   rexmit_timeout_id;
 164         kmutex_t                ad_mutex;
 165         uint32_t                ad_flags;
 166         uint32_t                ad_status;
 167         unsigned long           ad_unit_id;
 168         unsigned short          ad_major;
 169         unsigned short          ad_minor;
 170         unsigned long           ad_unit;
 171         unsigned short          ad_lasttag;
 172         unsigned short          ad_rttavg;
 173         unsigned short          ad_mintimer;
 174         off_t                   ad_nsectors;
 175         unsigned short          ad_nframes;
 176         char                    ad_vendor[40];
 177         char                    ad_product[40];
 178         char                    ad_revision[8];
 179         char                    ad_serial[20];
 180 } aoedisk_t;
 181 
 182 typedef struct aoeblk_frame {
 183         list_node_t             frm_node;
 184         void                    *frm_req;
 185         caddr_t                 frm_kaddr;
 186         uint32_t                frm_klen;
 187         uint32_t                frm_waited;
 188         uint32_t                frm_tag;
 189         uint32_t                frm_rsvd0;
 190         char                    frm_hdr[AOEHDRSZ];
 191 } aoeblk_frame_t;
 192 
 193 static int aoeblk_read(void *arg, bd_xfer_t *xfer);
 194 static int aoeblk_write(void *arg, bd_xfer_t *xfer);
 195 static int aoeblk_flush(void *arg, bd_xfer_t *xfer);
 196 static int aoeblk_reserve(void *arg, bd_xfer_t *xfer);
 197 static void aoeblk_driveinfo(void *arg, bd_drive_t *drive);
 198 static int aoeblk_mediainfo(void *arg, bd_media_t *media);
 199 static int aoeblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
 200 
 201 static bd_ops_t aoeblk_ops = {
 202         BD_OPS_VERSION_0,
 203         aoeblk_driveinfo,
 204         aoeblk_mediainfo,
 205         aoeblk_devid_init,
 206         aoeblk_flush,
 207         aoeblk_read,
 208         aoeblk_write,
 209         aoeblk_reserve,
 210 };
 211 
 212 static void
 213 aoeblk_driveinfo(void *arg, bd_drive_t *drive)
 214 {
 215         aoedisk_t *d = (aoedisk_t *)arg;
 216 
 217         drive->d_removable = B_FALSE;
 218         drive->d_hotpluggable = B_TRUE;
 219         drive->d_target = d->ad_major;
 220         drive->d_lun = d->ad_minor;
 221         drive->d_maxxfer = d->eport->eport_maxxfer;
 222 
 223         if (d->ad_nframes <= 4) {
 224                 drive->d_qsize = 1;
 225                 drive->d_maxxfer *= d->ad_nframes;
 226         } else if (d->ad_nframes <= 8) {
 227                 drive->d_qsize = 2;
 228                 drive->d_maxxfer *= (d->ad_nframes / 2);
 229         } else if (d->ad_nframes <= 16) {
 230                 drive->d_qsize = 4;
 231                 drive->d_maxxfer *= (d->ad_nframes / 4);
 232         } else if (d->ad_nframes <= 1024) {
 233                 drive->d_qsize = 8;
 234                 drive->d_maxxfer *= (d->ad_nframes / 8);
 235         } else {
 236                 drive->d_qsize = 64;
 237                 drive->d_maxxfer *= (d->ad_nframes / 64);
 238         }
 239 
 240         drive->d_vendor = d->ad_vendor;
 241         drive->d_vendor_len = strlen(d->ad_vendor);
 242         drive->d_product = d->ad_product;
 243         drive->d_product_len = strlen(d->ad_product);
 244         drive->d_serial = d->ad_serial;
 245         drive->d_serial_len = strlen(d->ad_serial);
 246         drive->d_revision = d->ad_revision;
 247         drive->d_revision_len = strlen(d->ad_revision);
 248 }
 249 
 250 static int
 251 aoeblk_mediainfo(void *arg, bd_media_t *media)
 252 {
 253         aoedisk_t *d = (aoedisk_t *)arg;
 254 
 255         media->m_nblks = d->ad_nsectors;
 256         media->m_blksize = DEV_BSIZE;
 257         media->m_readonly = 0;
 258 
 259         if (d->ad_flags & DEVFL_CLOSEWAIT)
 260                 return (1);
 261         return (0);
 262 }
 263 
 264 static int
 265 aoeblk_devid_init(void *arg, dev_info_t *dip, ddi_devid_t *devid)
 266 {
 267         aoedisk_t *d = (aoedisk_t *)arg;
 268         char hwid[CMDK_HWIDLEN];
 269         int ret;
 270 
 271         d->dev = dip;
 272 
 273         (void) snprintf(hwid, sizeof (hwid), "%s %s=%s",
 274             d->ad_vendor, d->ad_product, d->ad_serial);
 275 
 276         ret = ddi_devid_init(dip, DEVID_ATA_SERIAL, strlen(hwid), hwid, devid);
 277         if (ret != DDI_SUCCESS) {
 278                 dev_err(dip, CE_WARN,
 279                     "!failed to build devid for the device %d.%d",
 280                     d->ad_major, d->ad_minor);
 281         }
 282 
 283         return (ret);
 284 }
 285 
 286 static aoedisk_t *
 287 aoedisk_lookup_by_unit(aoeblk_softc_t *sc, unsigned long unit)
 288 {
 289         aoedisk_t *d = NULL;
 290 
 291         for (d = list_head(&sc->bd_list); d; d = list_next(&sc->bd_list, d)) {
 292                 if (unit == d->ad_unit)
 293                         break;
 294         }
 295 
 296         return (d);
 297 }
 298 
 299 static aoeblk_frame_t *
 300 findframe(aoedisk_t *d, uint32_t tag)
 301 {
 302         aoeblk_frame_t *f;
 303 
 304         mutex_enter(&d->ad_mutex);
 305         for (f = list_head(&d->frames); f; f = list_next(&d->frames, f)) {
 306                 if (f->frm_tag == tag) {
 307                         mutex_exit(&d->ad_mutex);
 308                         return (f);
 309                 }
 310         }
 311         mutex_exit(&d->ad_mutex);
 312         return (NULL);
 313 }
 314 
 315 static aoeblk_frame_t *
 316 allocframe(aoedisk_t *d, void *mac, aoe_frame_t **af_out, int kmflag)
 317 {
 318         aoe_frame_t *af;
 319         aoeblk_frame_t *f;
 320         aoe_eport_t *eport = d->eport;
 321 
 322         af = eport->eport_alloc_frame(eport, d->ad_unit_id, mac, kmflag);
 323         if (af == NULL) {
 324                 return (NULL);
 325         }
 326         f = FRM2PRIV(af);
 327         af->af_netb = NULL;
 328         if (!eport->eport_alloc_netb(af, AOEHDRSZ, f->frm_hdr, kmflag)) {
 329                 eport->eport_release_frame(af);
 330                 return (NULL);
 331         }
 332         mutex_enter(&d->ad_mutex);
 333         f->frm_tag = (uint32_t)INPROCTAG;
 334         list_insert_tail(&d->frames, f);
 335         mutex_exit(&d->ad_mutex);
 336 
 337         *af_out = af;
 338         return (f);
 339 }
 340 
 341 static void
 342 freeframe(aoedisk_t *d, aoeblk_frame_t *f)
 343 {
 344         aoe_frame_t *af = PRIV2FRM(f);
 345 
 346         mutex_enter(&d->ad_mutex);
 347         ASSERT(f->frm_tag == (uint32_t)INPROCTAG);
 348         f->frm_tag = (uint32_t)FREETAG;
 349         f->frm_req = NULL;
 350         list_remove(&d->frames, f);
 351         mutex_exit(&d->ad_mutex);
 352 
 353         d->eport->eport_release_frame(af);
 354 }
 355 
 356 /*
 357  * Leave the top bit clear so we have tagspace for userland.
 358  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
 359  * This driver reserves tag -1 to mean "unused frame."
 360  */
 361 static inline uint32_t
 362 newtag(aoedisk_t *d)
 363 {
 364         register int n;
 365 
 366         n = ddi_get_lbolt() & 0xffff;
 367         n |= (++d->ad_lasttag & 0x7fff) << 16;
 368         return (n);
 369 }
 370 
 371 static int
 372 aoehdr_atainit(aoedisk_t *d, aoe_hdr_t *h)
 373 {
 374         uint32_t host_tag;
 375 
 376         host_tag = newtag(d);
 377 
 378         h->aoeh_type = htons(ETHERTYPE_AOE);
 379         h->aoeh_verfl = AOE_HVER;
 380         h->aoeh_major = htons(d->ad_major);
 381         h->aoeh_minor = d->ad_minor;
 382         h->aoeh_cmd = (unsigned char)AOECMD_ATA;
 383         h->aoeh_tag = htonl(host_tag);
 384 
 385         return ((int)host_tag);
 386 }
 387 
 388 static inline uint16_t
 389 lhget16(uchar_t *p)
 390 {
 391         uint16_t n;
 392 
 393         n = p[1];
 394         n <<= 8;
 395         n |= p[0];
 396         return (n);
 397 }
 398 
 399 static inline uint32_t
 400 lhget32(uchar_t *p)
 401 {
 402         uint32_t n;
 403 
 404         n = lhget16(p+2);
 405         n <<= 16;
 406         n |= lhget16(p);
 407         return (n);
 408 }
 409 
 410 /* How long since we sent this tag? */
 411 static int
 412 tsince(unsigned int tag)
 413 {
 414         int n;
 415 
 416         n = ddi_get_lbolt() & 0xffff;
 417         n -= tag & 0xffff;
 418         if (n < 0)
 419                 n += 1<<16;
 420         return (n);
 421 }
 422 
 423 static void
 424 rexmit(aoedisk_t *d, aoeblk_frame_t *f)
 425 {
 426         aoe_frame_t *af = PRIV2FRM(f);
 427         aoe_hdr_t *h;
 428         aoe_atahdr_t *ah;
 429         uint32_t oldtag;
 430         void *oldnetb;
 431         mblk_t *mp;
 432         aoe_eport_t *eport = d->eport;
 433 
 434         h = (aoe_hdr_t *)f->frm_hdr;
 435         ah = (aoe_atahdr_t *)(h+1);
 436 
 437         if (f->frm_tag == (uint32_t)INPROCTAG ||
 438             f->frm_tag == (uint32_t)FREETAG ||
 439             f->frm_req == NULL) {
 440                 return;
 441         }
 442         oldtag = f->frm_tag;
 443         oldnetb = af->af_netb;
 444         f->frm_tag = (uint32_t)INPROCTAG;
 445 
 446         af->af_netb = NULL;
 447         mp = eport->eport_alloc_netb(af, AOEHDRSZ, f->frm_hdr, KM_SLEEP);
 448         if (mp == NULL) {
 449                 af->af_netb = oldnetb;
 450                 f->frm_tag = oldtag;
 451                 dev_err(d->sc->dev, CE_WARN,
 452                     "Out of memory on rexmit frame hdr tag %x", oldtag);
 453                 return;
 454         }
 455 
 456         if (f->frm_kaddr && ah->aa_aflags & AOEAFL_WRITE) {
 457                 if (!eport->eport_alloc_netb(af, f->frm_klen,
 458                     f->frm_kaddr, KM_SLEEP)) {
 459                         freeb(mp);
 460                         af->af_netb = oldnetb;
 461                         f->frm_tag = oldtag;
 462                         dev_err(d->sc->dev, CE_WARN,
 463                             "Out of memory on rexmit frame tag %x", oldtag);
 464                         return;
 465                 }
 466         }
 467         f->frm_tag = newtag(d);
 468         h->aoeh_tag = htonl(f->frm_tag);
 469 
 470         /*
 471          * Report accident... so that balancer can better estimate
 472          * what MAC needs to be used next time...
 473          */
 474         eport->eport_ctl(eport, af->af_mac,
 475             AOE_CMD_PORT_UNIT_RETRANSMIT, (void *)d->ad_unit_id);
 476 
 477         eport->eport_tx_frame(af);
 478 }
 479 
 480 static void
 481 aoeblk_downdisk(aoedisk_t *d, void *mac, int media_change)
 482 {
 483         int i;
 484 
 485         /* disable timer */
 486         atomic_or_32(&d->ad_flags, DEVFL_TKILL);
 487 
 488         if (media_change && d->ad_flags & DEVFL_UP) {
 489                 aoe_eport_t *eport = d->sc->eport;
 490 
 491                 atomic_and_32(&d->ad_flags, ~DEVFL_UP);
 492                 atomic_or_32(&d->ad_flags, DEVFL_CLOSEWAIT);
 493 
 494                 /*
 495                  * State change will occure in rexmit timer
 496                  */
 497                 atomic_or_32(&d->ad_flags, DEVFL_STATECHG);
 498 
 499                 if (mac != NULL) {
 500                         eport->eport_ctl(eport, mac,
 501                             AOE_CMD_PORT_UNIT_OFFLINE, (void *)d->ad_unit_id);
 502                         return;
 503                 }
 504                 for (i = 0; i < eport->eport_mac_cnt; i++) {
 505                         eport->eport_ctl(eport, eport->eport_mac[i],
 506                             AOE_CMD_PORT_UNIT_OFFLINE, (void *)d->ad_unit_id);
 507                 }
 508         } else
 509                 atomic_and_32(&d->ad_flags, ~DEVFL_UP);
 510 }
 511 
 512 static void
 513 rexmit_timer(void *vp)
 514 {
 515         int n, ntx;
 516         aoedisk_t *d = vp;
 517         register int timeout_ticks;
 518         aoeblk_frame_t *f;
 519 
 520         mutex_enter(&d->ad_mutex);
 521 
 522         /*
 523          * Timeout is always ~150% of the moving average.
 524          */
 525         timeout_ticks = d->ad_rttavg;
 526         timeout_ticks += timeout_ticks >> 1;
 527         ntx = 0;
 528 
 529         if (d->ad_flags & DEVFL_TKILL) {
 530                 mutex_exit(&d->ad_mutex);
 531                 return;
 532         }
 533         for (f = list_head(&d->frames); f; f = list_next(&d->frames, f)) {
 534                 if (f->frm_tag != (uint32_t)FREETAG &&
 535                     f->frm_tag != (uint32_t)INPROCTAG &&
 536                     tsince(f->frm_tag) > timeout_ticks) {
 537                         n = f->frm_waited += timeout_ticks;
 538                         n /= hz;
 539                         if (n > aoeblk_maxwait) {
 540                                 aoe_frame_t *af = PRIV2FRM(f);
 541                                 /* Waited too long.  Device failure. */
 542                                 aoeblk_downdisk(d, af->af_mac, 1);
 543                                 mutex_exit(&d->ad_mutex);
 544                                 dev_err(d->dev, CE_WARN, "device %d.%d is not "
 545                                     "responding and in-recovery",
 546                                     d->ad_major, d->ad_minor);
 547                                 return;
 548                         }
 549                         ntx++;
 550                         rexmit(d, f);
 551                         d->sc->ks_data->sts_rexmit_packets.value.ui64++;
 552                 }
 553         }
 554         if (ntx) {
 555                 n = d->ad_rttavg <<= 1;
 556                 if (n > MAXTIMER)
 557                         d->ad_rttavg = MAXTIMER;
 558         }
 559 
 560         if (d->rexmit_timeout_id)
 561                 d->rexmit_timeout_id = timeout(rexmit_timer, d, TIMERTICK);
 562         mutex_exit(&d->ad_mutex);
 563 
 564         /*
 565          * Trigger state change
 566          */
 567         if (d->ad_flags & DEVFL_STATECHG) {
 568                 atomic_and_32(&d->ad_flags, ~DEVFL_STATECHG);
 569                 if (d->ad_flags & DEVFL_CLOSEWAIT) {
 570                         if (d->bd_h)
 571                                 bd_state_change(d->bd_h);
 572                 } else {
 573                         if (d->bd_h) {
 574                                 (void) bd_detach_handle(d->bd_h);
 575                                 bd_free_handle(d->bd_h);
 576                                 d->bd_h = NULL;
 577                         }
 578                         atomic_or_32(&d->ad_flags, DEVFL_ATTWAIT);
 579                 }
 580         }
 581 
 582         if (d->ad_flags & DEVFL_ATTWAIT) {
 583                 int ret;
 584 
 585                 atomic_and_32(&d->ad_flags, ~DEVFL_ATTWAIT);
 586 
 587                 d->bd_h = bd_alloc_handle(d, &aoeblk_ops, NULL, KM_SLEEP);
 588                 if (d->bd_h == NULL) {
 589                         dev_err(d->dev, CE_WARN, "failed to allocate blkdev");
 590                         return;
 591                 }
 592 
 593                 ret = bd_attach_handle(d->sc->dev, d->bd_h);
 594                 if (ret != DDI_SUCCESS) {
 595                         bd_free_handle(d->bd_h);
 596                         d->bd_h = NULL;
 597                         dev_err(d->sc->dev, CE_WARN, "failed to attach blkdev");
 598                         return;
 599                 }
 600 
 601                 atomic_or_32(&d->ad_flags, DEVFL_UP);
 602                 d->eport->eport_ctl(d->eport, d->master_mac,
 603                     AOE_CMD_PORT_UNIT_ONLINE, (void *)d->ad_unit_id);
 604         }
 605 }
 606 
 607 static void
 608 calc_rttavg(aoedisk_t *d, unsigned int rtt)
 609 {
 610         register long n;
 611 
 612         n = rtt;
 613         if (n < 0) {
 614                 n = -((long)rtt);
 615                 if (n < MINTIMER)
 616                         n = MINTIMER;
 617                 else if (n > MAXTIMER)
 618                         n = MAXTIMER;
 619                 d->ad_mintimer += (n - d->ad_mintimer) >> 1;
 620         } else if (n < d->ad_mintimer) {
 621                 n = d->ad_mintimer;
 622         } else if (n > MAXTIMER) {
 623                 n = MAXTIMER;
 624         }
 625 
 626         n -= d->ad_rttavg;
 627         d->ad_rttavg += n >> 2;
 628 
 629         /*
 630          * We do not want to constantly retransmit if target
 631          * is busy... bad for performance. So, give target
 632          * more time would be reasonable
 633          */
 634         if (d->ad_rttavg < MINTIMER)
 635                 d->ad_rttavg = MINTIMER;
 636 }
 637 
 638 static void
 639 aoeblk_atawc(aoedisk_t *d, void *mac)
 640 {
 641         aoe_hdr_t *h;
 642         aoe_atahdr_t *ah;
 643         aoe_frame_t *af;
 644         aoeblk_frame_t *f;
 645         aoe_eport_t *eport = d->eport;
 646 
 647         f = allocframe(d, mac, &af, KM_NOSLEEP);
 648         if (f == NULL) {
 649                 return;
 650         }
 651         h = (aoe_hdr_t *)f->frm_hdr;
 652         ah = (aoe_atahdr_t *)(h+1);
 653 
 654         /* Initialize the headers & frame. */
 655         f->frm_tag = (uint32_t)aoehdr_atainit(d, h);
 656         f->frm_waited = 0;
 657 
 658         /* Set up ata header. */
 659         ah->aa_cmdstat = (unsigned char)ATA_SETFEATURES;
 660         ah->aa_errfeat = aoeblk_wc ? ATA_SF_ENAB_WCACHE : ATA_SF_DIS_WCACHE;
 661         ah->aa_lba3 = 0xa0;
 662 
 663         f->frm_req = NULL;
 664         eport->eport_tx_frame(af);
 665 
 666         atomic_and_32(&d->ad_flags, ~DEVFL_WC_UPDATE);
 667 }
 668 
 669 static void
 670 aoeblk_unwind(aoedisk_t *d)
 671 {
 672         aoeblk_frame_t *f;
 673 
 674         for (f = list_head(&d->frames); f; f = list_next(&d->frames, f))
 675                 f->frm_waited = 0;
 676 
 677         atomic_or_32(&d->ad_flags, DEVFL_UP);
 678         atomic_and_32(&d->ad_flags, ~DEVFL_CLOSEWAIT);
 679 
 680         /*
 681          * State change will occure in rexmit timer
 682          */
 683         atomic_or_32(&d->ad_flags, DEVFL_STATECHG);
 684 
 685         /* Re-enable rexmit timer */
 686         atomic_and_32(&d->ad_flags, ~DEVFL_TKILL);
 687         if (d->rexmit_timeout_id)
 688                 d->rexmit_timeout_id = timeout(rexmit_timer, d, TIMERTICK);
 689 }
 690 
 691 static void
 692 aoeblk_trim_inqstr(char *buf, int len)
 693 {
 694         boolean_t valid = B_FALSE;
 695         char *p = buf;
 696         int i, tb = 0;
 697 
 698         /*
 699          * Valid model/serial string must contain non-zero non-space.
 700          * Trim trailing spaces/NULL.
 701          */
 702         for (i = 0; i < len; i++) {
 703                 char c = *p++;
 704                 if (c != ' ' && c != '\0') {
 705                         tb = i + 1;
 706                         if (c != '0')
 707                                 valid = B_TRUE;
 708                 }
 709         }
 710 
 711         if (!valid)
 712                 tb = 0;
 713         buf[tb] = '\0';
 714 }
 715 
 716 static void
 717 aoeblk_ataid_rsp(aoedisk_t *d, aoe_frame_t *fin, char *id)
 718 {
 719         char model[40];
 720         char *vendor, *product;
 721         int n;
 722 
 723         swab(id + 54, model, sizeof (model));
 724         aoeblk_trim_inqstr(model, sizeof (model));
 725         swab(id + 46, d->ad_revision, sizeof (d->ad_revision));
 726         aoeblk_trim_inqstr(d->ad_revision, sizeof (d->ad_revision));
 727         swab(id + 20, d->ad_serial, sizeof (d->ad_serial));
 728         aoeblk_trim_inqstr(d->ad_serial, sizeof (d->ad_serial));
 729 
 730         /* Try parsing the model into vendor/product */
 731         sata_split_model(model, &vendor, &product);
 732         if (vendor == NULL)
 733                 (void) strlcpy(d->ad_vendor, "AoE", sizeof (d->ad_vendor));
 734         else
 735                 (void) strlcpy(d->ad_vendor, vendor, sizeof (d->ad_vendor));
 736         (void) strlcpy(d->ad_product, product, sizeof (d->ad_product));
 737 
 738         n = lhget16((uchar_t *)(id + (83<<1)));   /* Command set supported. */
 739         if (n & (1<<10)) {                    /* LBA48 */
 740                 atomic_or_32(&d->ad_flags, DEVFL_EXT);
 741                 /* Number of LBA48 sectors */
 742                 d->ad_nsectors = lhget32((uchar_t *)(id + (100<<1)));
 743         } else {
 744                 atomic_and_32(&d->ad_flags, ~DEVFL_EXT);
 745                 /* Number of LBA28 sectors */
 746                 d->ad_nsectors = lhget32((uchar_t *)(id + (60<<1)));
 747         }
 748 
 749         mutex_enter(&d->ad_mutex);
 750 
 751         /*
 752          * Use periodic discovery timer as iSCSI NOOP-style
 753          * ping mechanism. We received good response for this
 754          * unit path. Log it...
 755          */
 756         d->ad_status = 0;
 757 
 758         if (d->ad_flags & DEVFL_CLOSEWAIT) {
 759                 aoeblk_unwind(d);
 760                 mutex_exit(&d->ad_mutex);
 761                 dev_err(d->dev, CE_WARN,
 762                     "device %d.%d recovered and now online",
 763                     d->ad_major, d->ad_minor);
 764                 return;
 765         }
 766         mutex_exit(&d->ad_mutex);
 767 
 768         /*
 769          * Only one "master" MAC object exists per target
 770          * and it is the one which responded to ATA identify.
 771          */
 772         d->master_mac = fin->af_mac;
 773         atomic_or_32(&d->ad_flags, DEVFL_ATTWAIT);
 774 }
 775 
 776 static void
 777 aoeblk_ata_rsp(aoeblk_softc_t *sc, aoe_frame_t *fin)
 778 {
 779         aoedisk_t *d;
 780         aoe_hdr_t *hin = (aoe_hdr_t *)fin->af_data, *hout;
 781         aoe_atahdr_t *ahin, *ahout;
 782         unsigned long unit;
 783         register int tag;
 784         aoeblk_frame_t *fout;
 785         bd_xfer_t *xfer;
 786         xfer_private_t *x_priv = NULL;
 787 
 788         unit = AOEUNIT(ntohs(hin->aoeh_major), hin->aoeh_minor);
 789 
 790         mutex_enter(&sc->bd_mutex);
 791         d = aoedisk_lookup_by_unit(sc, unit);
 792         if (d == NULL) {
 793                 mutex_exit(&sc->bd_mutex);
 794                 dev_err(sc->dev, CE_WARN, "response from unknown device %d.%d",
 795                     ntohs(hin->aoeh_major), hin->aoeh_minor);
 796                 return;
 797         }
 798         if (!(d->ad_flags & DEVFL_OPEN)) {
 799                 mutex_exit(&sc->bd_mutex);
 800                 return;
 801         }
 802 
 803         /*
 804          * Target will copy original tag into reply message.
 805          * We use that (obviously!) to find the right outstanding frame
 806          */
 807         tag = ntohl(hin->aoeh_tag);
 808         fout = findframe(d, tag);
 809         if (fout == NULL) {
 810                 mutex_exit(&sc->bd_mutex);
 811                 calc_rttavg(d, -tsince(tag));
 812                 sc->ks_data->sts_unsolicited_packets.value.ui64++;
 813                 return;
 814         }
 815 
 816         hout = (aoe_hdr_t *)fout->frm_hdr;
 817         ahout = (aoe_atahdr_t *)(hout+1);
 818 
 819         xfer = fout->frm_req;
 820         if (xfer) {
 821                 x_priv = xfer_priv(xfer);
 822                 int n = ahout->aa_scnt << DEV_BSHIFT;
 823 
 824                 if ((ahout->aa_aflags & AOEAFL_WRITE) == 0 &&
 825                     MBLKL(FRM2MBLK(fin)) - AOEHDRSZ < n) {
 826                         mutex_exit(&sc->bd_mutex);
 827                         sc->ks_data->sts_bad_xfers.value.ui64++;
 828                         return;
 829                 }
 830 
 831                 if (x_priv->msgs_cnt >= x_priv->msgs_sent) {
 832                         mutex_exit(&sc->bd_mutex);
 833                         sc->ks_data->sts_unsolicited_xfers.value.ui64++;
 834                         return;
 835                 }
 836 
 837                 atomic_inc_32(&x_priv->msgs_cnt);
 838         }
 839 
 840         fout->frm_tag = (uint32_t)INPROCTAG;
 841         calc_rttavg(d, tsince(tag));
 842 
 843         ahin = (aoe_atahdr_t *)(hin+1);
 844         if (ahin->aa_cmdstat & 0xa9) {   /* These bits cleared on success. */
 845                 dev_err(d->dev, CE_WARN, "ATA error: cmd %x stat %x tag %x",
 846                     ahout->aa_cmdstat, ahin->aa_cmdstat, tag);
 847                 switch (ahout->aa_cmdstat) {
 848                 case ATA_READ:
 849                 case ATA_READ48:
 850                 case ATA_WRITE:
 851                 case ATA_WRITE48:
 852                 case ATA_FLUSHCACHE:
 853                 case ATA_FLUSHCACHE48:
 854                         atomic_inc_32(&x_priv->err_cnt);
 855                         break;
 856                 case ATA_SETFEATURES:
 857                         break;
 858                 default:
 859                         ;
 860                 }
 861         } else {
 862                 switch (ahout->aa_cmdstat) {
 863                 case ATA_READ:
 864                 case ATA_READ48:
 865                         bcopy(((char *)hin) + AOEHDRSZ, fout->frm_kaddr,
 866                             fout->frm_klen);
 867                         break;
 868                 case ATA_WRITE:
 869                 case ATA_WRITE48:
 870                         break;
 871                 case ATA_SETFEATURES:
 872                         if (ahin->aa_errfeat & (1<<2)) {
 873                                 dev_err(sc->dev, CE_WARN,
 874                                     "setfeatures failure for device %d.%d",
 875                                     ntohs(hin->aoeh_major), hin->aoeh_minor);
 876                         }
 877                         break;
 878                 case ATA_ATA_IDENTIFY:
 879                         if (MBLKL(FRM2MBLK(fin)) - AOEHDRSZ < DEV_BSIZE) {
 880                                 dev_err(sc->dev, CE_WARN,
 881                                     "ATA IDENTIFY failure for device %d.%d",
 882                                     ntohs(hin->aoeh_major), hin->aoeh_minor);
 883                                 break;
 884                         }
 885                         aoeblk_ataid_rsp(d, fin, (char *)(ahin+1));
 886                         atomic_or_32(&d->ad_flags, DEVFL_WC_UPDATE);
 887                         break;
 888 #if 0
 889                 case ATA_SMART:
 890                         /* n = m->m_len; */
 891                         n = m->m_pkthdr.len;
 892                         if (n > sizeof (f->f_hdr))
 893                                 n = sizeof (f->f_hdr);
 894                         (void *) memcpy(f->f_hdr, hin, n);
 895                         f->f_tag = INPROCTAG;
 896                         wakeup(d);
 897                         mtx_unlock(&d->ad_mtx);
 898                         return;
 899 #endif
 900                 case ATA_FLUSHCACHE:
 901                 case ATA_FLUSHCACHE48:
 902                         break;
 903                 default:
 904                         dev_err(sc->dev, CE_WARN,
 905                             "unrecognized ATA command %xh from %d.%d",
 906                             ahout->aa_cmdstat, ntohs(hin->aoeh_major),
 907                             hin->aoeh_minor);
 908                 }
 909         }
 910 
 911         if (xfer && x_priv->msgs_cnt == x_priv->msgs_sent) {
 912                 bd_xfer_done(xfer, x_priv->err_cnt ? EIO : 0);
 913         }
 914         mutex_exit(&sc->bd_mutex);
 915         freeframe(d, fout);
 916 
 917         if (d->ad_flags & DEVFL_WC_UPDATE)
 918                 aoeblk_atawc(d, fin->af_mac);
 919 }
 920 
 921 static void
 922 aoeblk_ataid(aoedisk_t *d, void *mac)
 923 {
 924         aoe_hdr_t *h;
 925         aoe_atahdr_t *ah;
 926         aoeblk_frame_t *f;
 927         aoe_frame_t *af;
 928         aoe_eport_t *eport = d->eport;
 929 
 930         f = allocframe(d, mac, &af, KM_NOSLEEP);
 931         if (f == NULL) {
 932                 return;
 933         }
 934         h = (aoe_hdr_t *)f->frm_hdr;
 935         ah = (aoe_atahdr_t *)(h+1);
 936 
 937         /* Initialize the headers & frame. */
 938         f->frm_tag = (uint32_t)aoehdr_atainit(d, h);
 939         f->frm_waited = 0;
 940 
 941         /* This message initializes the device, so we reset the rttavg. */
 942         d->ad_rttavg = MAXTIMER;
 943 
 944         /* Set up ata header. */
 945         ah->aa_scnt = 1;
 946         ah->aa_cmdstat = ATA_ATA_IDENTIFY;
 947         ah->aa_lba3 = 0xa0;
 948 
 949         f->frm_req = NULL;
 950         eport->eport_tx_frame(af);
 951 }
 952 
 953 static void
 954 aoeblk_cfg_rsp(aoeblk_softc_t *sc, aoe_frame_t *fin)
 955 {
 956         aoe_hdr_t *h = (aoe_hdr_t *)fin->af_data;
 957         aoe_cfghdr_t *ch = (aoe_cfghdr_t *)(h+1);
 958         unsigned long unit;
 959         aoedisk_t *d;
 960 
 961         unit = AOEUNIT(ntohs(h->aoeh_major), h->aoeh_minor);
 962 
 963         mutex_enter(&sc->bd_mutex);
 964         d = aoedisk_lookup_by_unit(sc, unit);
 965         if (d && d->ad_flags & DEVFL_OPEN) {
 966 
 967                 if (d->ad_flags & DEVFL_UP) {
 968 
 969                         mutex_exit(&sc->bd_mutex);
 970 
 971                         /*
 972                          * If this is matching shelf.slot response from
 973                          * the new MAC, record it as a new data path
 974                          */
 975                         (void) d->eport->eport_report_unit(d->eport,
 976                             fin->af_mac, d->ad_unit, (char *)h->aoeh_src);
 977 
 978                         (void) d->eport->eport_ctl(d->eport, fin->af_mac,
 979                             AOE_CMD_PORT_UNIT_ONLINE, (void *)d->ad_unit_id);
 980 
 981                         d->ad_status = 0;
 982                         return;
 983                 }
 984 
 985                 /*
 986                  * Target is still open and we just received healthy response.
 987                  * We will attempt to re-initialize.
 988                  */
 989                 if (d->ad_flags & DEVFL_CLOSEWAIT) {
 990                         /*
 991                          * Update number of max. outstanding frames
 992                          */
 993                         d->ad_nframes = ntohs(ch->ac_bufcnt);
 994                         if (d->ad_nframes > (unsigned short) aoedisk_maxframes)
 995                                 d->ad_nframes =
 996                                     (unsigned short) aoedisk_maxframes;
 997 
 998                         /*
 999                          * Update target address
1000                          */
1001                         (void) d->eport->eport_report_unit(d->eport,
1002                             fin->af_mac, d->ad_unit, (char *)h->aoeh_src);
1003 
1004                         mutex_exit(&sc->bd_mutex);
1005 
1006                         d->ad_status = 0;
1007                         goto send_ataid;
1008                 }
1009 
1010                 /*
1011                  * Skip otherwise...
1012                  */
1013                 mutex_exit(&sc->bd_mutex);
1014                 return;
1015         }
1016         mutex_exit(&sc->bd_mutex);
1017 
1018         /*
1019          * Allocate new AoE device aoedisk
1020          */
1021         d = kmem_zalloc(sizeof (aoedisk_t), KM_SLEEP);
1022         if (d == NULL) {
1023                 dev_err(sc->dev, CE_WARN,
1024                     "unable create new device %lx", unit);
1025                 return;
1026         }
1027         list_create(&d->frames, sizeof (aoeblk_frame_t),
1028             offsetof(aoeblk_frame_t, frm_node));
1029         mutex_init(&d->ad_mutex, NULL, MUTEX_DRIVER, NULL);
1030         d->eport = sc->eport;
1031         d->sc = sc;
1032         d->dev = sc->dev;
1033         d->ad_nframes = ntohs(ch->ac_bufcnt);
1034         if (d->ad_nframes > (unsigned short) aoedisk_maxframes)
1035                 d->ad_nframes = (unsigned short) aoedisk_maxframes;
1036         d->ad_unit = unit;
1037         d->ad_major = (unsigned short)AOEMAJOR(unit);
1038         d->ad_minor = (unsigned short)AOEMINOR(unit);
1039 
1040         /*
1041          * Report new unit
1042          */
1043         d->ad_unit_id = d->eport->eport_report_unit(d->eport, fin->af_mac,
1044             d->ad_unit, (char *)h->aoeh_src);
1045 
1046         mutex_enter(&sc->bd_mutex);
1047         list_insert_tail(&sc->bd_list, d);
1048         mutex_exit(&sc->bd_mutex);
1049 
1050         atomic_or_32(&d->ad_flags, DEVFL_OPEN);
1051         atomic_and_32(&d->ad_flags, ~DEVFL_TKILL);
1052         d->rexmit_timeout_id = timeout(rexmit_timer, d, TIMERTICK);
1053 
1054 send_ataid:
1055         /*
1056          * Get nsectors, id, etc.. and initialize device
1057          * on successful completion
1058          */
1059         aoeblk_ataid(d, fin->af_mac);
1060 }
1061 
1062 static aoe_frame_t *
1063 aoeblk_cfg(aoe_eport_t *eport, int mac_id, unsigned short aoemajor,
1064     unsigned short aoeminor)
1065 {
1066         aoeblk_softc_t *sc = eport->eport_client_private;
1067         aoe_hdr_t *h;
1068         aoedisk_t *d;
1069         aoe_frame_t *af;
1070         aoeblk_frame_t *f;
1071 
1072         af = eport->eport_alloc_frame(eport, 0, eport->eport_mac[mac_id],
1073             KM_SLEEP);
1074         if (af == NULL)
1075                 return (NULL);
1076         f = FRM2PRIV(af);
1077         af->af_netb = NULL;
1078         if (!eport->eport_alloc_netb(af, sizeof (*h) +
1079             sizeof (aoe_cfghdr_t), f->frm_hdr, KM_SLEEP)) {
1080                 eport->eport_release_frame(af);
1081                 return (NULL);
1082         }
1083         h = (aoe_hdr_t *)f->frm_hdr;
1084 
1085         (void *) memset((void *)h->aoeh_dst, 0xff, sizeof (h->aoeh_dst));
1086         h->aoeh_type = htons(ETHERTYPE_AOE);
1087         h->aoeh_verfl = AOE_HVER;
1088         h->aoeh_major = htons(aoemajor);
1089         h->aoeh_minor = (unsigned char)aoeminor;
1090         h->aoeh_cmd = (unsigned char)AOECMD_CFG;
1091         f->frm_tag = (ddi_get_lbolt() & 0xffff) | (1 << 16);
1092         h->aoeh_tag = htonl(f->frm_tag);
1093 
1094         ASSERT(MUTEX_HELD(&sc->bd_mutex));
1095         for (d = list_head(&sc->bd_list); d; d = list_next(&sc->bd_list, d)) {
1096                 d->ad_status++;
1097         }
1098 
1099         eport->eport_tx_frame(af);
1100 
1101         return (af);
1102 }
1103 
1104 static inline void
1105 put_lba(aoe_atahdr_t *ah, diskaddr_t lba)
1106 {
1107         ah->aa_lba0 = (unsigned char)lba;
1108         ah->aa_lba1 = (unsigned char)(lba >>= 8);
1109         ah->aa_lba2 = (unsigned char)(lba >>= 8);
1110         ah->aa_lba3 = (unsigned char)(lba >>= 8);
1111         ah->aa_lba4 = (unsigned char)(lba >>= 8);
1112         ah->aa_lba5 = (unsigned char)(lba >>= 8);
1113 }
1114 
1115 static int
1116 aoeblk_ata_rw(aoedisk_t *d, bd_xfer_t *xfer, int op)
1117 {
1118         aoe_hdr_t *h;
1119         aoe_atahdr_t *ah;
1120         aoe_frame_t *af;
1121         aoeblk_frame_t *f;
1122         char extbit, cmd;
1123         int frags, kmflag;
1124         unsigned int msgs, i;
1125         xfer_private_t *x_priv = xfer_priv(xfer);
1126         aoe_eport_t *eport = d->eport;
1127 
1128         frags = eport->eport_maxxfer >> 9;
1129 
1130         if ((xfer->x_blkno + xfer->x_nblks) > d->ad_nsectors) {
1131                 d->sc->ks_data->sts_rw_badoffset.value.ui64++;
1132                 return (EINVAL);
1133         }
1134 
1135         bzero(x_priv, sizeof (*x_priv));
1136 
1137         kmflag = xfer->x_flags & BD_XFER_POLL ? KM_NOSLEEP : KM_SLEEP;
1138 
1139         /*
1140          * Number of IO messages for this request
1141          */
1142         msgs = (xfer->x_nblks + (frags - 1)) / frags;
1143         msgs = (msgs == 0 && op == OP_FLUSH) ? 1 : msgs;
1144         x_priv->msgs_sent = msgs;
1145         for (i = 0; i < msgs; i++) {
1146                 int blkno, nblks;
1147                 caddr_t kaddr;
1148 
1149                 kaddr = xfer->x_kaddr + i * frags * DEV_BSIZE;
1150                 nblks = frags;
1151                 if (i == msgs - 1) {
1152                         /*
1153                          * Adjust balance for the last one
1154                          */
1155                         nblks = frags - (msgs * frags - xfer->x_nblks);
1156                 }
1157                 blkno = xfer->x_blkno + i * frags;
1158 
1159                 /* Initialize the headers & frame. */
1160                 f = allocframe(d, NULL, &af, kmflag);
1161                 if (f == NULL) {
1162                         d->sc->ks_data->sts_rw_outofmemory.value.ui64++;
1163                         return (ENOMEM);
1164                 }
1165                 h = (aoe_hdr_t *)f->frm_hdr;
1166                 ah = (aoe_atahdr_t *)(h+1);
1167 
1168                 f->frm_tag = (uint32_t)aoehdr_atainit(d, h);
1169                 f->frm_kaddr = kaddr;
1170                 f->frm_klen = DEV_BSIZE * nblks;
1171                 f->frm_waited = 0;
1172 
1173                 /* Set up ata header. */
1174                 ah->aa_scnt = (unsigned char)nblks;
1175                 put_lba(ah, blkno);
1176                 if (d->ad_flags & DEVFL_EXT) {
1177                         ah->aa_aflags |= AOEAFL_EXT;
1178                         extbit = 0x4;
1179                 } else {
1180                         extbit = 0;
1181                         ah->aa_lba3 &= 0x0f;
1182                         ah->aa_lba3 |= 0xe0; /* LBA bit + obsolete 0xa0. */
1183                 }
1184                 if (op == OP_READ) {
1185                         cmd = (char)ATA_READ;
1186                 } else if (op == OP_WRITE) {
1187                         cmd = ATA_WRITE;
1188                         ah->aa_aflags |= AOEAFL_WRITE;
1189                         if (!eport->eport_alloc_netb(af, f->frm_klen,
1190                             f->frm_kaddr, kmflag)) {
1191                                 freeframe(d, f);
1192                                 d->sc->ks_data->sts_rw_outofmemory.value.ui64++;
1193                                 return (ENOMEM);
1194                         }
1195                 } else { /* OP_FLUSH */
1196                         cmd = (char)ATA_FLUSHCACHE;
1197                         f->frm_kaddr = NULL;
1198                         f->frm_klen = 0;
1199                 }
1200                 ah->aa_cmdstat = cmd | extbit;
1201                 f->frm_req = xfer;
1202                 eport->eport_tx_frame(af);
1203         }
1204 
1205         return (DDI_SUCCESS);
1206 }
1207 
1208 static int
1209 aoeblk_read(void *arg, bd_xfer_t *xfer)
1210 {
1211         return (aoeblk_ata_rw(arg, xfer, OP_READ));
1212 }
1213 
1214 static int
1215 aoeblk_write(void *arg, bd_xfer_t *xfer)
1216 {
1217         return (aoeblk_ata_rw(arg, xfer, OP_WRITE));
1218 }
1219 
1220 static int
1221 aoeblk_flush(void *arg, bd_xfer_t *xfer)
1222 {
1223         return (aoeblk_ata_rw(arg, xfer, OP_FLUSH));
1224 }
1225 
1226 static void
1227 aoeblk_rsv_rsp(aoeblk_softc_t *sc, aoe_frame_t *fin)
1228 {
1229         aoedisk_t *d;
1230         aoe_hdr_t *hin = (aoe_hdr_t *)fin->af_data;
1231         unsigned long unit;
1232         register int tag;
1233         aoeblk_frame_t *fout;
1234         int rc = 0;
1235 
1236         unit = AOEUNIT(ntohs(hin->aoeh_major), hin->aoeh_minor);
1237 
1238         mutex_enter(&sc->bd_mutex);
1239         d = aoedisk_lookup_by_unit(sc, unit);
1240         if (d == NULL) {
1241                 mutex_exit(&sc->bd_mutex);
1242                 return;
1243         }
1244         if (!(d->ad_flags & DEVFL_OPEN)) {
1245                 mutex_exit(&sc->bd_mutex);
1246                 return;
1247         }
1248 
1249         tag = ntohl(hin->aoeh_tag);
1250         fout = findframe(d, tag);
1251         if (fout == NULL) {
1252                 mutex_exit(&sc->bd_mutex);
1253                 sc->ks_data->sts_unsolicited_packets.value.ui64++;
1254                 return;
1255         }
1256 
1257         if ((hin->aoeh_verfl & AOEFL_ERR) && (hin->aoeh_err == 6))
1258                 rc = 1; /* target is reserved */
1259         else
1260                 rc = 0; /* target is not reserved */
1261 
1262         fout->frm_tag = (uint32_t)INPROCTAG;
1263         bd_xfer_done(fout->frm_req, rc);
1264         mutex_exit(&sc->bd_mutex);
1265 
1266         mutex_enter(&d->ad_mutex);
1267         ASSERT(fout->frm_tag == (uint32_t)INPROCTAG);
1268         fout->frm_tag = (uint32_t)FREETAG;
1269         fout->frm_req = NULL;
1270         aoe_frame_t *af = PRIV2FRM(fout);
1271         list_remove(&d->frames, fout);
1272         d->eport->eport_release_frame(af);
1273         mutex_exit(&d->ad_mutex);
1274 }
1275 
1276 static int
1277 aoeblk_reserve(void *arg, bd_xfer_t *xfer)
1278 {
1279         aoedisk_t *d = (aoedisk_t *)arg;
1280         aoe_hdr_t *h;
1281         aoe_rsvhdr_t *rh;
1282         aoe_frame_t *af;
1283         aoeblk_frame_t *f;
1284         aoe_eport_t *eport = d->eport;
1285         aoeblk_softc_t *sc;
1286         uint8_t *addr;
1287         int hlen, rhlen, i;
1288         uint32_t free_mem = eport->cache_unit_size;
1289 
1290         if (xfer->x_flags & BD_XFER_POLL)
1291                 return (EIO);
1292 
1293         sc = eport->eport_client_private;
1294         af = eport->eport_alloc_frame(eport, d->ad_unit_id, NULL, KM_SLEEP);
1295         if (!af)
1296                 return (NULL);
1297         f = FRM2PRIV(af);
1298         hlen = sizeof (*h);
1299 
1300         if (xfer->x_flags & BD_XFER_MHD_TKOWN ||
1301             xfer->x_flags & BD_XFER_MHD_QRESERVE)
1302                 rhlen =  sizeof (*rh) +
1303                     (eport->eport_mac_cnt * sizeof (ether_addr_t));
1304         else if (xfer->x_flags & BD_XFER_MHD_RELEASE)
1305                 rhlen =  sizeof (*rh);
1306         else if (xfer->x_flags & BD_XFER_MHD_STATUS)
1307                 rhlen =  sizeof (*rh);
1308         else {
1309                 eport->eport_release_frame(af);
1310                 return (NULL);
1311         }
1312         af->af_netb = NULL;
1313         if (!eport->eport_alloc_netb(af, hlen + rhlen, f->frm_hdr, KM_SLEEP)) {
1314                 eport->eport_release_frame(af);
1315                 return (NULL);
1316         }
1317         free_mem -= hlen + rhlen;
1318 
1319         h = (aoe_hdr_t *)f->frm_hdr;
1320         f->frm_tag = newtag(d);
1321         f->frm_req = xfer;
1322         f->frm_kaddr = xfer->x_kaddr;
1323         f->frm_klen = 0;
1324         f->frm_waited = 0;
1325 
1326         h->aoeh_type = htons(ETHERTYPE_AOE);
1327         h->aoeh_verfl = AOE_HVER;
1328         h->aoeh_major = htons(d->ad_major);
1329         h->aoeh_minor = d->ad_minor;
1330         h->aoeh_cmd = (unsigned char)AOECMD_RSV;
1331         h->aoeh_tag = htonl(f->frm_tag);
1332 
1333         rh = (aoe_rsvhdr_t *)(h+1);
1334         if (xfer->x_flags & BD_XFER_MHD_TKOWN ||
1335             xfer->x_flags & BD_XFER_MHD_QRESERVE) {
1336                 rh->al_rcmd = AOE_RCMD_SET_LIST;
1337                 rh->al_nmacs = sc->eport->eport_mac_cnt;
1338                 for (i = 0; i < eport->eport_mac_cnt; i++) {
1339                         if (free_mem < sizeof (ether_addr_t))
1340                                 goto buffer_overflow;
1341                         addr = eport->eport_get_mac_addr(eport->eport_mac[i]);
1342                         bcopy(addr, rh->al_addr[i], sizeof (ether_addr_t));
1343                 }
1344         } else if (xfer->x_flags & BD_XFER_MHD_RELEASE) {
1345                 rh->al_rcmd = AOE_RCMD_SET_LIST;
1346                 rh->al_nmacs = 0;
1347         } else if (xfer->x_flags & BD_XFER_MHD_STATUS) {
1348                 rh->al_rcmd = AOE_RCMD_READ_LIST;
1349                 rh->al_nmacs = 0;
1350         }
1351 
1352         f->frm_req = xfer;
1353 
1354         mutex_enter(&d->ad_mutex);
1355         list_insert_tail(&d->frames, f);
1356         mutex_exit(&d->ad_mutex);
1357 
1358         eport->eport_tx_frame(af);
1359 
1360         return (DDI_SUCCESS);
1361 
1362 buffer_overflow:
1363         eport->eport_free_netb(af->af_netb);
1364         eport->eport_release_frame(af);
1365         return (DDI_FAILURE);
1366 }
1367 
1368 static int
1369 aoeblk_ksupdate(kstat_t *ksp, int rw)
1370 {
1371         aoeblk_stats_t *ks = ksp->ks_data;
1372         aoeblk_softc_t *sc = ksp->ks_private;
1373         aoeblk_stats_t *ns = sc->sc_intrstat->ks_data;
1374 
1375         if (rw != KSTAT_READ)
1376                 return (EACCES);
1377 
1378         ks->sts_rw_outofmemory = ns->sts_rw_outofmemory;
1379         ks->sts_rw_badoffset = ns->sts_rw_badoffset;
1380         ks->sts_error_packets = ns->sts_error_packets;
1381         ks->sts_unsolicited_packets = ns->sts_unsolicited_packets;
1382         ks->sts_unsolicited_xfers = ns->sts_unsolicited_xfers;
1383         ks->sts_bad_xfers = ns->sts_bad_xfers;
1384         ks->sts_rexmit_packets = ns->sts_rexmit_packets;
1385 
1386         return (0);
1387 }
1388 
1389 static void
1390 aoeblk_rx_frame(aoe_frame_t *fin)
1391 {
1392         aoe_hdr_t *h = (aoe_hdr_t *)fin->af_data;
1393         aoeblk_softc_t *sc = fin->af_eport->eport_client_private;
1394         uint32_t n;
1395 
1396         n = ntohl(h->aoeh_tag);
1397         if ((h->aoeh_verfl & AOEFL_RSP) == 0 || (n & ((uint32_t)1)<<31)) {
1398                 char bcast_addr[sizeof (ether_addr_t)] = {
1399                             (char)0xff, (char)0xff, (char)0xff,
1400                             (char)0xff, (char)0xff, (char)0xff };
1401                 if (bcmp(h->aoeh_dst, bcast_addr, sizeof (ether_addr_t)) != 0) {
1402                         dev_err(sc->dev, CE_WARN,
1403                             "unsupported AoE frame with verfl %d tag %d cmd %d "
1404                             "error packet from %d.%d", h->aoeh_verfl, n,
1405                             h->aoeh_cmd, ntohs(h->aoeh_major), h->aoeh_minor);
1406                 }
1407                 goto release_exit;
1408         }
1409 
1410         if (h->aoeh_verfl & AOEFL_ERR) {
1411                 n = h->aoeh_err;
1412                 if (n > NECODES)
1413                         n = 0;
1414                 dev_err(sc->dev, CE_WARN,
1415                     "error packet from %d.%d; ecode=%d '%s'\n",
1416                     ntohs(h->aoeh_major), h->aoeh_minor,
1417                     h->aoeh_err, aoe_errlist[n]);
1418                 sc->ks_data->sts_error_packets.value.ui64++;
1419                 goto release_exit;
1420         }
1421 
1422         switch (h->aoeh_cmd) {
1423         case AOECMD_ATA:
1424                 aoeblk_ata_rsp(sc, fin);
1425                 break;
1426         case AOECMD_CFG:
1427                 aoeblk_cfg_rsp(sc, fin);
1428                 break;
1429         case AOECMD_RSV:
1430                 aoeblk_rsv_rsp(sc, fin);
1431                 break;
1432         default:
1433                 dev_err(sc->dev, CE_WARN,
1434                     "unknown cmd %d\n", h->aoeh_cmd);
1435         }
1436 
1437 release_exit:
1438         sc->eport->eport_free_netb(fin->af_netb);
1439         sc->eport->eport_release_frame(fin);
1440 }
1441 
1442 static void
1443 aoeblk_port_event(aoe_eport_t *eport, uint32_t event)
1444 {
1445         aoeblk_softc_t *sc = eport->eport_client_private;
1446         aoedisk_t *d;
1447 
1448         mutex_enter(&sc->bd_mutex);
1449 
1450         for (d = list_head(&sc->bd_list); d; d = list_next(&sc->bd_list, d)) {
1451                 mutex_enter(&d->ad_mutex);
1452                 switch (event) {
1453                 case AOE_NOTIFY_EPORT_LINK_UP:
1454                         aoeblk_unwind(d);
1455                         break;
1456                 case AOE_NOTIFY_EPORT_LINK_DOWN:
1457                         aoeblk_downdisk(d, NULL, 0);
1458                         break;
1459                 default:
1460                         ;
1461                 }
1462                 mutex_exit(&d->ad_mutex);
1463         }
1464 
1465         mutex_exit(&sc->bd_mutex);
1466 }
1467 
1468 void
1469 aoeblk_discover(void *arg)
1470 {
1471         aoeblk_softc_t *sc = arg;
1472         aoedisk_t *d;
1473         int i;
1474 
1475         mutex_enter(&sc->bd_mutex);
1476         if (sc->discovery_timeout_id == 0) {
1477                 mutex_exit(&sc->bd_mutex);
1478                 return;
1479         }
1480         for (d = list_head(&sc->bd_list); d; d = list_next(&sc->bd_list, d)) {
1481                 /*
1482                  * Non-zero ad_status means error, unit not responding
1483                  * and all paths offline, speed up unit failure.
1484                  */
1485                 mutex_enter(&d->ad_mutex);
1486                 if (d->ad_status >= 3 && d->ad_flags & DEVFL_UP) {
1487                         aoeblk_downdisk(d, NULL, 1);
1488                         dev_err(d->dev, CE_WARN, "device %d.%d is not "
1489                             "responding to ping discovery, in-recovery now",
1490                             d->ad_major, d->ad_minor);
1491                 }
1492                 mutex_exit(&d->ad_mutex);
1493         }
1494         for (i = 0; i < sc->eport->eport_mac_cnt; i++) {
1495                 if (sc->bcast_f[i])
1496                         sc->eport->eport_release_frame(sc->bcast_f[i]);
1497                 sc->bcast_f[i] = aoeblk_cfg(sc->eport, i, 0xffff, 0xff);
1498 
1499                 /*
1500                  * A bit of a delay between discovery requests would help
1501                  * to align targets for this port into arrays of active
1502                  * targets with matching shelf.slot.
1503                  */
1504                 delay(1);
1505         }
1506         sc->discovery_timeout_id = timeout(aoeblk_discover, sc,
1507             drv_usectohz(DISCTIMER*1000000));
1508         mutex_exit(&sc->bd_mutex);
1509 }
1510 
1511 static int
1512 aoeblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1513 {
1514         int ret;
1515         aoeblk_softc_t *sc;
1516         aoeblk_stats_t *ks_data;
1517         aoe_client_t client;
1518 
1519         switch (cmd) {
1520         case DDI_ATTACH:
1521                 break;
1522 
1523         case DDI_RESUME:
1524         case DDI_PM_RESUME:
1525                 return (DDI_SUCCESS);
1526 
1527         default:
1528                 dev_err(dip, CE_WARN, "cmd 0x%x not recognized", cmd);
1529                 ret = DDI_FAILURE;
1530                 goto exit;
1531         }
1532 
1533         sc = kmem_zalloc(sizeof (aoeblk_softc_t), KM_SLEEP);
1534         if (sc == NULL) {
1535                 dev_err(dip, CE_WARN, "Cannot allocate softc memory");
1536                 ret = DDI_FAILURE;
1537                 goto exit;
1538         }
1539         ddi_set_driver_private(dip, sc);
1540         sc->dev = dip;
1541 
1542         /*
1543          * This should not normally fail, since we don't use a persistent
1544          * stat. We do it this way to avoid having to test for it at run
1545          * time on the hot path.
1546          */
1547         sc->sc_intrstat = kstat_create("aoeblk", ddi_get_instance(dip),
1548             "intrs", "controller", KSTAT_TYPE_NAMED,
1549             sizeof (aoeblk_stats_t) / sizeof (kstat_named_t),
1550             KSTAT_FLAG_PERSISTENT);
1551         if (sc->sc_intrstat == NULL) {
1552                 dev_err(dip, CE_WARN, "kstat_create failed");
1553                 ret = DDI_FAILURE;
1554                 goto exit_intrstat;
1555         }
1556         ks_data = (aoeblk_stats_t *)sc->sc_intrstat->ks_data;
1557         kstat_named_init(&ks_data->sts_rw_outofmemory,
1558             "total_rw_outofmemory", KSTAT_DATA_UINT64);
1559         kstat_named_init(&ks_data->sts_rw_badoffset,
1560             "total_rw_badoffset", KSTAT_DATA_UINT64);
1561         kstat_named_init(&ks_data->sts_error_packets,
1562             "total_error_packets", KSTAT_DATA_UINT64);
1563         kstat_named_init(&ks_data->sts_unsolicited_packets,
1564             "total_unsolicited_packets", KSTAT_DATA_UINT64);
1565         kstat_named_init(&ks_data->sts_unsolicited_xfers,
1566             "total_unsolicited_xfers", KSTAT_DATA_UINT64);
1567         kstat_named_init(&ks_data->sts_bad_xfers,
1568             "total_bad_xfers", KSTAT_DATA_UINT64);
1569         kstat_named_init(&ks_data->sts_rexmit_packets,
1570             "total_rexmit_packets", KSTAT_DATA_UINT64);
1571         sc->ks_data = ks_data;
1572         sc->sc_intrstat->ks_private = sc;
1573         sc->sc_intrstat->ks_update = aoeblk_ksupdate;
1574         kstat_install(sc->sc_intrstat);
1575 
1576         /*
1577          * Port driver supplies mac_id which is unique per mac (port).
1578          * We are using it to identify "channel" via channelid below.
1579          */
1580         ret = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1581             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "port_id", -1);
1582         if (ret == -1) {
1583                 dev_err(dip, CE_WARN, "get port_id failed");
1584                 goto exit_prop_get;
1585         }
1586 
1587         /*
1588          * Get the configured maximum number of outstanding frames.
1589          */
1590         aoedisk_maxframes = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1591             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "disk-max-frames",
1592             AOEDISK_MAXFRAMES);
1593 
1594         client.ect_channelid = ret;
1595         client.ect_eport_flags = EPORT_FLAG_INI_MODE;
1596         client.ect_rx_frame = aoeblk_rx_frame;
1597         client.ect_port_event = aoeblk_port_event;
1598         client.ect_private_frame_struct_size = sizeof (aoeblk_frame_t);
1599         client.ect_client_port_struct = sc;
1600         sc->eport = aoe_register_client(&client);
1601         if (sc->eport == NULL) {
1602                 dev_err(dip, CE_WARN, "AoE client registration failed");
1603                 ret = DDI_FAILURE;
1604                 goto exit_eport;
1605         }
1606 
1607         mutex_init(&sc->bd_mutex, NULL, MUTEX_DRIVER, NULL);
1608         list_create(&sc->bd_list, sizeof (aoedisk_t),
1609             offsetof(aoedisk_t, node));
1610 
1611         /*
1612          * This will activate RX path
1613          */
1614         sc->eport->eport_ctl(sc->eport, NULL, AOE_CMD_PORT_ONLINE, NULL);
1615 
1616         sc->discovery_timeout_id = timeout(aoeblk_discover, sc,
1617             drv_usectohz(1*1000000));
1618 
1619         return (DDI_SUCCESS);
1620 
1621 exit_eport:
1622 exit_prop_get:
1623         kstat_delete(sc->sc_intrstat);
1624 exit_intrstat:
1625         kmem_free(sc, sizeof (aoeblk_softc_t));
1626 exit:
1627         return (ret);
1628 }
1629 
1630 static int
1631 aoeblk_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1632 {
1633         aoeblk_softc_t *sc = ddi_get_driver_private(dip);
1634         aoedisk_t *d;
1635         int i;
1636 
1637         switch (cmd) {
1638         case DDI_DETACH:
1639                 break;
1640 
1641         case DDI_SUSPEND:
1642         case DDI_PM_SUSPEND:
1643                 return (DDI_SUCCESS);
1644 
1645         default:
1646                 dev_err(dip, CE_WARN, "cmd 0x%x unrecognized", cmd);
1647                 return (DDI_FAILURE);
1648         }
1649 
1650         mutex_enter(&sc->bd_mutex);
1651 
1652         if (sc->discovery_timeout_id != 0) {
1653                 while (untimeout(sc->discovery_timeout_id) == -1)
1654                         ;
1655                 sc->discovery_timeout_id = 0;
1656         }
1657 
1658         for (i = 0; i < sc->eport->eport_mac_cnt; i++) {
1659                 if (sc->bcast_f[i])
1660                         sc->eport->eport_release_frame(sc->bcast_f[i]);
1661         }
1662 
1663         while (!list_empty(&sc->bd_list)) {
1664                 d = list_head(&sc->bd_list);
1665 
1666                 mutex_enter(&d->ad_mutex);
1667                 atomic_and_32(&d->ad_flags, ~DEVFL_OPEN);
1668 
1669                 aoeblk_downdisk(d, NULL, 0);
1670 
1671                 if (d->rexmit_timeout_id != 0) {
1672                         (void) untimeout(d->rexmit_timeout_id);
1673                         d->rexmit_timeout_id = 0;
1674                 }
1675 
1676                 while (!list_empty(&d->frames)) {
1677                         aoeblk_frame_t *f = list_head(&d->frames);
1678                         aoe_frame_t *af = PRIV2FRM(f);
1679                         list_remove(&d->frames, f);
1680                         d->eport->eport_release_frame(af);
1681                 }
1682 
1683                 mutex_exit(&d->ad_mutex);
1684 
1685                 list_remove(&sc->bd_list, d);
1686                 kmem_free(d, sizeof (aoedisk_t));
1687         }
1688 
1689         mutex_exit(&sc->bd_mutex);
1690 
1691         sc->eport->eport_deregister_client(sc->eport);
1692 
1693         /* Note: we let aoe driver to detach us */
1694         kstat_delete(sc->sc_intrstat);
1695         kmem_free(sc, sizeof (aoeblk_softc_t));
1696 
1697         return (DDI_SUCCESS);
1698 }
1699 
1700 int
1701 _init(void)
1702 {
1703         int     rv;
1704 
1705         bd_mod_init(&aoeblk_dev_ops);
1706 
1707         if ((rv = mod_install(&modlinkage)) != 0) {
1708                 bd_mod_fini(&aoeblk_dev_ops);
1709         }
1710 
1711         return (rv);
1712 }
1713 
1714 int
1715 _fini(void)
1716 {
1717         int     rv;
1718 
1719         if ((rv = mod_remove(&modlinkage)) == 0) {
1720                 bd_mod_fini(&aoeblk_dev_ops);
1721         }
1722 
1723         return (rv);
1724 }
1725 
1726 int
1727 _info(struct modinfo *modinfop)
1728 {
1729 
1730         return (mod_info(&modlinkage, modinfop));
1731 }