1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  * ZFS syseventd module.
  29  *
  30  * The purpose of this module is to identify when devices are added to the
  31  * system, and appropriately online or replace the affected vdevs.
  32  *
  33  * When a device is added to the system:
  34  *
  35  *      1. Search for any vdevs whose devid matches that of the newly added
  36  *         device.
  37  *
  38  *      2. If no vdevs are found, then search for any vdevs whose devfs path
  39  *         matches that of the new device.
  40  *
  41  *      3. If no vdevs match by either method, then ignore the event.
  42  *
  43  *      4. Attempt to online the device with a flag to indicate that it should
  44  *         be unspared when resilvering completes.  If this succeeds, then the
  45  *         same device was inserted and we should continue normally.
  46  *
  47  *      5. If the pool does not have the 'autoreplace' property set, attempt to
  48  *         online the device again without the unspare flag, which will
  49  *         generate a FMA fault.
  50  *
  51  *      6. If the pool has the 'autoreplace' property set, and the matching vdev
  52  *         is a whole disk, then label the new disk and attempt a 'zpool
  53  *         replace'.
  54  *
  55  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
  56  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
  57  * indicates that a device failed to open during pool load, but the autoreplace
  58  * property was set.  In this case, we deferred the associated FMA fault until
  59  * our module had a chance to process the autoreplace logic.  If the device
  60  * could not be replaced, then the second online attempt will trigger the FMA
  61  * fault that we skipped earlier.
  62  */
  63 
  64 #include <alloca.h>
  65 #include <devid.h>
  66 #include <fcntl.h>
  67 #include <libnvpair.h>
  68 #include <libsysevent.h>
  69 #include <libzfs.h>
  70 #include <limits.h>
  71 #include <stdlib.h>
  72 #include <string.h>
  73 #include <syslog.h>
  74 #include <sys/list.h>
  75 #include <sys/sunddi.h>
  76 #include <sys/sysevent/eventdefs.h>
  77 #include <sys/sysevent/dev.h>
  78 #include <thread_pool.h>
  79 #include <unistd.h>
  80 #include "syseventd.h"
  81 
  82 #if defined(__i386) || defined(__amd64)
  83 #define PHYS_PATH       ":q"
  84 #define RAW_SLICE       "p0"
  85 #elif defined(__sparc)
  86 #define PHYS_PATH       ":c"
  87 #define RAW_SLICE       "s2"
  88 #else
  89 #error Unknown architecture
  90 #endif
  91 
  92 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
  93 
  94 libzfs_handle_t *g_zfshdl;
  95 list_t g_pool_list;
  96 tpool_t *g_tpool;
  97 boolean_t g_enumeration_done;
  98 thread_t g_zfs_tid;
  99 
 100 typedef struct unavailpool {
 101         zpool_handle_t  *uap_zhp;
 102         list_node_t     uap_node;
 103 } unavailpool_t;
 104 
 105 int
 106 zfs_toplevel_state(zpool_handle_t *zhp)
 107 {
 108         nvlist_t *nvroot;
 109         vdev_stat_t *vs;
 110         unsigned int c;
 111 
 112         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 113             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 114         verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 115             (uint64_t **)&vs, &c) == 0);
 116         return (vs->vs_state);
 117 }
 118 
 119 static int
 120 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
 121 {
 122         if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
 123                 unavailpool_t *uap;
 124                 uap = malloc(sizeof (unavailpool_t));
 125                 uap->uap_zhp = zhp;
 126                 list_insert_tail((list_t *)data, uap);
 127         } else {
 128                 zpool_close(zhp);
 129         }
 130         return (0);
 131 }
 132 
 133 /*
 134  * The device associated with the given vdev (either by devid or physical path)
 135  * has been added to the system.  If 'isdisk' is set, then we only attempt a
 136  * replacement if it's a whole disk.  This also implies that we should label the
 137  * disk first.
 138  *
 139  * First, we attempt to online the device (making sure to undo any spare
 140  * operation when finished).  If this succeeds, then we're done.  If it fails,
 141  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
 142  * but that the label was not what we expected.  If the 'autoreplace' property
 143  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
 144  * replace'.  If the online is successful, but the new state is something else
 145  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
 146  * race, and we should avoid attempting to relabel the disk.
 147  */
 148 static void
 149 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
 150 {
 151         char *path;
 152         vdev_state_t newstate;
 153         nvlist_t *nvroot, *newvd;
 154         uint64_t wholedisk = 0ULL;
 155         uint64_t offline = 0ULL;
 156         char *physpath = NULL;
 157         char rawpath[PATH_MAX], fullpath[PATH_MAX];
 158         zpool_boot_label_t boot_type;
 159         uint64_t boot_size;
 160         size_t len;
 161 
 162         if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
 163                 return;
 164 
 165         (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
 166         (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
 167         (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
 168 
 169         /*
 170          * We should have a way to online a device by guid.  With the current
 171          * interface, we are forced to chop off the 's0' for whole disks.
 172          */
 173         (void) strlcpy(fullpath, path, sizeof (fullpath));
 174         if (wholedisk)
 175                 fullpath[strlen(fullpath) - 2] = '\0';
 176 
 177         /*
 178          * Attempt to online the device.  It would be nice to online this by
 179          * GUID, but the current interface only supports lookup by path.
 180          */
 181         if (offline ||
 182             (zpool_vdev_online(zhp, fullpath,
 183             ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
 184             (newstate == VDEV_STATE_HEALTHY ||
 185             newstate == VDEV_STATE_DEGRADED)))
 186                 return;
 187 
 188         /*
 189          * If the pool doesn't have the autoreplace property set, then attempt a
 190          * true online (without the unspare flag), which will trigger a FMA
 191          * fault.
 192          */
 193         if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
 194             (isdisk && !wholedisk)) {
 195                 (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 196                     &newstate);
 197                 return;
 198         }
 199 
 200         if (isdisk) {
 201                 /*
 202                  * If this is a request to label a whole disk, then attempt to
 203                  * write out the label.  Before we can label the disk, we need
 204                  * access to a raw node.  Ideally, we'd like to walk the devinfo
 205                  * tree and find a raw node from the corresponding parent node.
 206                  * This is overly complicated, and since we know how we labeled
 207                  * this device in the first place, we know it's save to switch
 208                  * from /dev/dsk to /dev/rdsk and append the backup slice.
 209                  *
 210                  * If any part of this process fails, then do a force online to
 211                  * trigger a ZFS fault for the device (and any hot spare
 212                  * replacement).
 213                  */
 214                 if (strncmp(path, ZFS_DISK_ROOTD,
 215                     strlen(ZFS_DISK_ROOTD)) != 0) {
 216                         (void) zpool_vdev_online(zhp, fullpath,
 217                             ZFS_ONLINE_FORCEFAULT, &newstate);
 218                         return;
 219                 }
 220 
 221                 (void) strlcpy(rawpath, path + 9, sizeof (rawpath));
 222                 len = strlen(rawpath);
 223                 rawpath[len - 2] = '\0';
 224 
 225                 if (zpool_is_bootable(zhp))
 226                         boot_type = ZPOOL_COPY_BOOT_LABEL;
 227                 else
 228                         boot_type = ZPOOL_NO_BOOT_LABEL;
 229 
 230                 boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
 231                 if (zpool_label_disk(g_zfshdl, zhp, rawpath,
 232                     boot_type, boot_size, NULL) != 0) {
 233                         (void) zpool_vdev_online(zhp, fullpath,
 234                             ZFS_ONLINE_FORCEFAULT, &newstate);
 235                         return;
 236                 }
 237         }
 238 
 239         /*
 240          * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
 241          * the entire vdev structure is harmless, we construct a reduced set of
 242          * path/physpath/wholedisk to keep it simple.
 243          */
 244         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
 245                 return;
 246 
 247         if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
 248                 nvlist_free(nvroot);
 249                 return;
 250         }
 251 
 252         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
 253             nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
 254             (physpath != NULL && nvlist_add_string(newvd,
 255             ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
 256             nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
 257             nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
 258             nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
 259             1) != 0) {
 260                 nvlist_free(newvd);
 261                 nvlist_free(nvroot);
 262                 return;
 263         }
 264 
 265         nvlist_free(newvd);
 266 
 267         (void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
 268 
 269         nvlist_free(nvroot);
 270 
 271 }
 272 
 273 /*
 274  * Utility functions to find a vdev matching given criteria.
 275  */
 276 typedef struct dev_data {
 277         const char              *dd_compare;
 278         const char              *dd_prop;
 279         zfs_process_func_t      dd_func;
 280         boolean_t               dd_found;
 281         boolean_t               dd_isdisk;
 282         uint64_t                dd_pool_guid;
 283         uint64_t                dd_vdev_guid;
 284 } dev_data_t;
 285 
 286 static void
 287 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
 288 {
 289         dev_data_t *dp = data;
 290         char *path;
 291         uint_t c, children;
 292         nvlist_t **child;
 293         size_t len;
 294         uint64_t guid;
 295 
 296         /*
 297          * First iterate over any children.
 298          */
 299         if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
 300             &child, &children) == 0) {
 301                 for (c = 0; c < children; c++)
 302                         zfs_iter_vdev(zhp, child[c], data);
 303                 return;
 304         }
 305 
 306         if (dp->dd_vdev_guid != 0) {
 307                 if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
 308                     &guid) != 0 || guid != dp->dd_vdev_guid)
 309                         return;
 310         } else if (dp->dd_compare != NULL) {
 311                 len = strlen(dp->dd_compare);
 312 
 313                 if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
 314                     strncmp(dp->dd_compare, path, len) != 0)
 315                         return;
 316 
 317                 /*
 318                  * Normally, we want to have an exact match for the comparison
 319                  * string.  However, we allow substring matches in the following
 320                  * cases:
 321                  *
 322                  *      <path>:           This is a devpath, and the target is one
 323                  *                      of its children.
 324                  *
 325                  *      <path/>           This is a devid for a whole disk, and
 326                  *                      the target is one of its children.
 327                  */
 328                 if (path[len] != '\0' && path[len] != ':' &&
 329                     path[len - 1] != '/')
 330                         return;
 331         }
 332 
 333         (dp->dd_func)(zhp, nvl, dp->dd_isdisk);
 334 }
 335 
 336 void
 337 zfs_enable_ds(void *arg)
 338 {
 339         unavailpool_t *pool = (unavailpool_t *)arg;
 340 
 341         (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
 342         zpool_close(pool->uap_zhp);
 343         free(pool);
 344 }
 345 
 346 static int
 347 zfs_iter_pool(zpool_handle_t *zhp, void *data)
 348 {
 349         nvlist_t *config, *nvl;
 350         dev_data_t *dp = data;
 351         uint64_t pool_guid;
 352         unavailpool_t *pool;
 353 
 354         if ((config = zpool_get_config(zhp, NULL)) != NULL) {
 355                 if (dp->dd_pool_guid == 0 ||
 356                     (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 357                     &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
 358                         (void) nvlist_lookup_nvlist(config,
 359                             ZPOOL_CONFIG_VDEV_TREE, &nvl);
 360                         zfs_iter_vdev(zhp, nvl, data);
 361                 }
 362         }
 363         if (g_enumeration_done)  {
 364                 for (pool = list_head(&g_pool_list); pool != NULL;
 365                     pool = list_next(&g_pool_list, pool)) {
 366 
 367                         if (strcmp(zpool_get_name(zhp),
 368                             zpool_get_name(pool->uap_zhp)))
 369                                 continue;
 370                         if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
 371                                 list_remove(&g_pool_list, pool);
 372                                 (void) tpool_dispatch(g_tpool, zfs_enable_ds,
 373                                     pool);
 374                                 break;
 375                         }
 376                 }
 377         }
 378 
 379         zpool_close(zhp);
 380         return (0);
 381 }
 382 
 383 /*
 384  * Given a physical device path, iterate over all (pool, vdev) pairs which
 385  * correspond to the given path.
 386  */
 387 static boolean_t
 388 devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
 389 {
 390         dev_data_t data = { 0 };
 391 
 392         data.dd_compare = devpath;
 393         data.dd_func = func;
 394         data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
 395         data.dd_found = B_FALSE;
 396         data.dd_isdisk = wholedisk;
 397 
 398         (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 399 
 400         return (data.dd_found);
 401 }
 402 
 403 /*
 404  * Given a /devices path, lookup the corresponding devid for each minor node,
 405  * and find any vdevs with matching devids.  Doing this straight up would be
 406  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
 407  * the fact that each devid ends with "/<minornode>".  Once we find any valid
 408  * minor node, we chop off the portion after the last slash, and then search for
 409  * matching vdevs, which is O(vdevs in system).
 410  */
 411 static boolean_t
 412 devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
 413 {
 414         size_t len = strlen(devpath) + sizeof ("/devices") +
 415             sizeof (PHYS_PATH) - 1;
 416         char *fullpath;
 417         int fd;
 418         ddi_devid_t devid;
 419         char *devidstr, *fulldevid;
 420         dev_data_t data = { 0 };
 421 
 422         /*
 423          * Try to open a known minor node.
 424          */
 425         fullpath = alloca(len);
 426         (void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
 427         if ((fd = open(fullpath, O_RDONLY)) < 0)
 428                 return (B_FALSE);
 429 
 430         /*
 431          * Determine the devid as a string, with no trailing slash for the minor
 432          * node.
 433          */
 434         if (devid_get(fd, &devid) != 0) {
 435                 (void) close(fd);
 436                 return (B_FALSE);
 437         }
 438         (void) close(fd);
 439 
 440         if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
 441                 devid_free(devid);
 442                 return (B_FALSE);
 443         }
 444 
 445         len = strlen(devidstr) + 2;
 446         fulldevid = alloca(len);
 447         (void) snprintf(fulldevid, len, "%s/", devidstr);
 448 
 449         data.dd_compare = fulldevid;
 450         data.dd_func = func;
 451         data.dd_prop = ZPOOL_CONFIG_DEVID;
 452         data.dd_found = B_FALSE;
 453         data.dd_isdisk = wholedisk;
 454 
 455         (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 456 
 457         devid_str_free(devidstr);
 458         devid_free(devid);
 459 
 460         return (data.dd_found);
 461 }
 462 
 463 /*
 464  * This function is called when we receive a devfs add event.  This can be
 465  * either a disk event or a lofi event, and the behavior is slightly different
 466  * depending on which it is.
 467  */
 468 static int
 469 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 470 {
 471         char *devpath, *devname;
 472         char path[PATH_MAX], realpath[PATH_MAX];
 473         char *colon, *raw;
 474         int ret;
 475 
 476         /*
 477          * The main unit of operation is the physical device path.  For disks,
 478          * this is the device node, as all minor nodes are affected.  For lofi
 479          * devices, this includes the minor path.  Unfortunately, this isn't
 480          * represented in the DEV_PHYS_PATH for various reasons.
 481          */
 482         if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
 483                 return (-1);
 484 
 485         /*
 486          * If this is a lofi device, then also get the minor instance name.
 487          * Unfortunately, the current payload doesn't include an easy way to get
 488          * this information.  So we cheat by resolving the 'dev_name' (which
 489          * refers to the raw device) and taking the portion between ':(*),raw'.
 490          */
 491         (void) strlcpy(realpath, devpath, sizeof (realpath));
 492         if (is_lofi) {
 493                 if (nvlist_lookup_string(nvl, DEV_NAME,
 494                     &devname) == 0 &&
 495                     (ret = resolvepath(devname, path,
 496                     sizeof (path))) > 0) {
 497                         path[ret] = '\0';
 498                         colon = strchr(path, ':');
 499                         if (colon != NULL)
 500                                 raw = strstr(colon + 1, ",raw");
 501                         if (colon != NULL && raw != NULL) {
 502                                 *raw = '\0';
 503                                 (void) snprintf(realpath,
 504                                     sizeof (realpath), "%s%s",
 505                                     devpath, colon);
 506                                 *raw = ',';
 507                         }
 508                 }
 509         }
 510 
 511         /*
 512          * Iterate over all vdevs with a matching devid, and then those with a
 513          * matching /devices path.  For disks, we only want to pay attention to
 514          * vdevs marked as whole disks.  For lofi, we don't care (because we're
 515          * matching an exact minor name).
 516          */
 517         if (!devid_iter(realpath, zfs_process_add, !is_lofi))
 518                 (void) devpath_iter(realpath, zfs_process_add, !is_lofi);
 519 
 520         return (0);
 521 }
 522 
 523 /*
 524  * Called when we receive a VDEV_CHECK event, which indicates a device could not
 525  * be opened during initial pool open, but the autoreplace property was set on
 526  * the pool.  In this case, we treat it as if it were an add event.
 527  */
 528 static int
 529 zfs_deliver_check(nvlist_t *nvl)
 530 {
 531         dev_data_t data = { 0 };
 532 
 533         if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
 534             &data.dd_pool_guid) != 0 ||
 535             nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
 536             &data.dd_vdev_guid) != 0 ||
 537             data.dd_vdev_guid == 0)
 538                 return (0);
 539 
 540         data.dd_isdisk = B_TRUE;
 541         data.dd_func = zfs_process_add;
 542 
 543         (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 544 
 545         return (0);
 546 }
 547 
 548 #define DEVICE_PREFIX   "/devices"
 549 
 550 static int
 551 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 552 {
 553         char *devname = data;
 554         boolean_t avail_spare, l2cache;
 555         vdev_state_t newstate;
 556         nvlist_t *tgt;
 557 
 558         syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
 559             devname, zpool_get_name(zhp));
 560 
 561         if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
 562             &avail_spare, &l2cache, NULL)) != NULL) {
 563                 char *path, fullpath[MAXPATHLEN];
 564                 uint64_t wholedisk = 0ULL;
 565 
 566                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
 567                     &path) == 0);
 568                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
 569                     &wholedisk) == 0);
 570 
 571                 (void) strlcpy(fullpath, path, sizeof (fullpath));
 572                 if (wholedisk) {
 573                         fullpath[strlen(fullpath) - 2] = '\0';
 574 
 575                         /*
 576                          * We need to reopen the pool associated with this
 577                          * device so that the kernel can update the size
 578                          * of the expanded device.
 579                          */
 580                         (void) zpool_reopen(zhp);
 581                 }
 582 
 583                 if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
 584                         syseventd_print(9, "zfsdle_vdev_online: setting device"
 585                             " device %s to ONLINE state in pool %s.\n",
 586                             fullpath, zpool_get_name(zhp));
 587                         if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
 588                                 (void) zpool_vdev_online(zhp, fullpath, 0,
 589                                     &newstate);
 590                 }
 591                 zpool_close(zhp);
 592                 return (1);
 593         }
 594         zpool_close(zhp);
 595         return (0);
 596 }
 597 
 598 /*
 599  * This function is called for each vdev of a pool for which any of the
 600  * following events was recieved:
 601  *  - ESC_ZFS_vdev_add
 602  *  - ESC_ZFS_vdev_attach
 603  *  - ESC_ZFS_vdev_clear
 604  *  - ESC_ZFS_vdev_online
 605  *  - ESC_ZFS_pool_create
 606  *  - ESC_ZFS_pool_import
 607  * It will update the vdevs FRU property if it is out of date.
 608  */
 609 /*ARGSUSED2*/
 610 static void
 611 zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
 612 {
 613         char *devpath, *cptr, *oldfru = NULL;
 614         const char *newfru;
 615         uint64_t vdev_guid;
 616 
 617         (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
 618         (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &devpath);
 619         (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
 620 
 621         /* remove :<slice> from devpath */
 622         cptr = strrchr(devpath, ':');
 623         if (cptr != NULL)
 624                 *cptr = '\0';
 625 
 626         newfru = libzfs_fru_lookup(g_zfshdl, devpath);
 627         if (newfru == NULL) {
 628                 syseventd_print(9, "zfs_update_vdev_fru: no FRU for %s\n",
 629                     devpath);
 630                 return;
 631         }
 632 
 633         /* do nothing if the FRU hasn't changed */
 634         if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
 635                 syseventd_print(9, "zfs_update_vdev_fru: FRU unchanged\n");
 636                 return;
 637         }
 638 
 639         syseventd_print(9, "zfs_update_vdev_fru: devpath = %s\n", devpath);
 640         syseventd_print(9, "zfs_update_vdev_fru: FRU = %s\n", newfru);
 641 
 642         (void) zpool_fru_set(zhp, vdev_guid, newfru);
 643 }
 644 
 645 /*
 646  * This function handles the following events:
 647  *  - ESC_ZFS_vdev_add
 648  *  - ESC_ZFS_vdev_attach
 649  *  - ESC_ZFS_vdev_clear
 650  *  - ESC_ZFS_vdev_online
 651  *  - ESC_ZFS_pool_create
 652  *  - ESC_ZFS_pool_import
 653  * It will iterate over the pool vdevs to update the FRU property.
 654  */
 655 int
 656 zfs_deliver_update(nvlist_t *nvl)
 657 {
 658         dev_data_t dd = { 0 };
 659         char *pname;
 660         zpool_handle_t *zhp;
 661         nvlist_t *config, *vdev;
 662 
 663         if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
 664                 syseventd_print(9, "zfs_deliver_update: no pool name\n");
 665                 return (-1);
 666         }
 667 
 668         /*
 669          * If this event was triggered by a pool export or destroy we cannot
 670          * open the pool. This is not an error, just return 0 as we don't care
 671          * about these events.
 672          */
 673         zhp = zpool_open_canfail(g_zfshdl, pname);
 674         if (zhp == NULL)
 675                 return (0);
 676 
 677         config = zpool_get_config(zhp, NULL);
 678         if (config == NULL) {
 679                 syseventd_print(9, "zfs_deliver_update: "
 680                     "failed to get pool config for %s\n", pname);
 681                 zpool_close(zhp);
 682                 return (-1);
 683         }
 684 
 685         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
 686                 syseventd_print(0, "zfs_deliver_update: "
 687                     "failed to get vdev tree for %s\n", pname);
 688                 zpool_close(zhp);
 689                 return (-1);
 690         }
 691 
 692         libzfs_fru_refresh(g_zfshdl);
 693 
 694         dd.dd_func = zfs_update_vdev_fru;
 695         zfs_iter_vdev(zhp, vdev, &dd);
 696 
 697         zpool_close(zhp);
 698         return (0);
 699 }
 700 
 701 int
 702 zfs_deliver_dle(nvlist_t *nvl)
 703 {
 704         char *devname;
 705         if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
 706                 syseventd_print(9, "zfs_deliver_event: no physpath\n");
 707                 return (-1);
 708         }
 709         if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
 710                 syseventd_print(9, "zfs_deliver_event: invalid "
 711                     "device '%s'", devname);
 712                 return (-1);
 713         }
 714 
 715         /*
 716          * We try to find the device using the physical
 717          * path that has been supplied. We need to strip off
 718          * the /devices prefix before starting our search.
 719          */
 720         devname += strlen(DEVICE_PREFIX);
 721         if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
 722                 syseventd_print(9, "zfs_deliver_event: device '%s' not"
 723                     " found\n", devname);
 724                 return (1);
 725         }
 726         return (0);
 727 }
 728 
 729 
 730 /*ARGSUSED*/
 731 static int
 732 zfs_deliver_event(sysevent_t *ev, int unused)
 733 {
 734         const char *class = sysevent_get_class_name(ev);
 735         const char *subclass = sysevent_get_subclass_name(ev);
 736         nvlist_t *nvl;
 737         int ret;
 738         boolean_t is_lofi = B_FALSE, is_check = B_FALSE;
 739         boolean_t is_dle = B_FALSE, is_update = B_FALSE;
 740 
 741         if (strcmp(class, EC_DEV_ADD) == 0) {
 742                 /*
 743                  * We're mainly interested in disk additions, but we also listen
 744                  * for new lofi devices, to allow for simplified testing.
 745                  */
 746                 if (strcmp(subclass, ESC_DISK) == 0)
 747                         is_lofi = B_FALSE;
 748                 else if (strcmp(subclass, ESC_LOFI) == 0)
 749                         is_lofi = B_TRUE;
 750                 else
 751                         return (0);
 752 
 753                 is_check = B_FALSE;
 754         } else if (strcmp(class, EC_ZFS) == 0) {
 755                 if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
 756                         /*
 757                          * This event signifies that a device failed to open
 758                          * during pool load, but the 'autoreplace' property was
 759                          * set, so we should pretend it's just been added.
 760                          */
 761                         is_check = B_TRUE;
 762                 } else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
 763                     (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
 764                     (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
 765                     (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
 766                     (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
 767                     (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
 768                         /*
 769                          * When we receive these events we check the pool
 770                          * configuration and update the vdev FRUs if necessary.
 771                          */
 772                         is_update = B_TRUE;
 773                 }
 774         } else if (strcmp(class, EC_DEV_STATUS) == 0 &&
 775             strcmp(subclass, ESC_DEV_DLE) == 0) {
 776                 is_dle = B_TRUE;
 777         } else {
 778                 return (0);
 779         }
 780 
 781         if (sysevent_get_attr_list(ev, &nvl) != 0)
 782                 return (-1);
 783 
 784         if (is_dle)
 785                 ret = zfs_deliver_dle(nvl);
 786         else if (is_update)
 787                 ret = zfs_deliver_update(nvl);
 788         else if (is_check)
 789                 ret = zfs_deliver_check(nvl);
 790         else
 791                 ret = zfs_deliver_add(nvl, is_lofi);
 792 
 793         nvlist_free(nvl);
 794         return (ret);
 795 }
 796 
 797 /*ARGSUSED*/
 798 void *
 799 zfs_enum_pools(void *arg)
 800 {
 801         (void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
 802         if (!list_is_empty(&g_pool_list))
 803                 g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
 804                     0, NULL);
 805         g_enumeration_done = B_TRUE;
 806         return (NULL);
 807 }
 808 
 809 static struct slm_mod_ops zfs_mod_ops = {
 810         SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
 811 };
 812 
 813 struct slm_mod_ops *
 814 slm_init()
 815 {
 816         if ((g_zfshdl = libzfs_init()) == NULL)
 817                 return (NULL);
 818         /*
 819          * collect a list of unavailable pools (asynchronously,
 820          * since this can take a while)
 821          */
 822         list_create(&g_pool_list, sizeof (struct unavailpool),
 823             offsetof(struct unavailpool, uap_node));
 824         if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
 825                 return (NULL);
 826         return (&zfs_mod_ops);
 827 }
 828 
 829 void
 830 slm_fini()
 831 {
 832         unavailpool_t *pool;
 833 
 834         (void) thr_join(g_zfs_tid, NULL, NULL);
 835         if (g_tpool != NULL) {
 836                 tpool_wait(g_tpool);
 837                 tpool_destroy(g_tpool);
 838         }
 839         while ((pool = (list_head(&g_pool_list))) != NULL) {
 840                 list_remove(&g_pool_list, pool);
 841                 zpool_close(pool->uap_zhp);
 842                 free(pool);
 843         }
 844         list_destroy(&g_pool_list);
 845         libzfs_fini(g_zfshdl);
 846 }