1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * ZFS syseventd module.
29 *
30 * The purpose of this module is to identify when devices are added to the
31 * system, and appropriately online or replace the affected vdevs.
32 *
33 * When a device is added to the system:
34 *
35 * 1. Search for any vdevs whose devid matches that of the newly added
36 * device.
37 *
38 * 2. If no vdevs are found, then search for any vdevs whose devfs path
39 * matches that of the new device.
40 *
41 * 3. If no vdevs match by either method, then ignore the event.
42 *
43 * 4. Attempt to online the device with a flag to indicate that it should
44 * be unspared when resilvering completes. If this succeeds, then the
45 * same device was inserted and we should continue normally.
46 *
47 * 5. If the pool does not have the 'autoreplace' property set, attempt to
48 * online the device again without the unspare flag, which will
49 * generate a FMA fault.
50 *
51 * 6. If the pool has the 'autoreplace' property set, and the matching vdev
52 * is a whole disk, then label the new disk and attempt a 'zpool
53 * replace'.
54 *
55 * The module responds to EC_DEV_ADD events for both disks and lofi devices,
56 * with the latter used for testing. The special ESC_ZFS_VDEV_CHECK event
57 * indicates that a device failed to open during pool load, but the autoreplace
58 * property was set. In this case, we deferred the associated FMA fault until
59 * our module had a chance to process the autoreplace logic. If the device
60 * could not be replaced, then the second online attempt will trigger the FMA
61 * fault that we skipped earlier.
62 */
63
64 #include <alloca.h>
65 #include <devid.h>
66 #include <fcntl.h>
67 #include <libnvpair.h>
68 #include <libsysevent.h>
69 #include <libzfs.h>
70 #include <limits.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <syslog.h>
74 #include <sys/list.h>
75 #include <sys/sunddi.h>
76 #include <sys/sysevent/eventdefs.h>
77 #include <sys/sysevent/dev.h>
78 #include <thread_pool.h>
79 #include <unistd.h>
80 #include "syseventd.h"
81
82 #if defined(__i386) || defined(__amd64)
83 #define PHYS_PATH ":q"
84 #define RAW_SLICE "p0"
85 #elif defined(__sparc)
86 #define PHYS_PATH ":c"
87 #define RAW_SLICE "s2"
88 #else
89 #error Unknown architecture
90 #endif
91
92 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
93
94 libzfs_handle_t *g_zfshdl;
95 list_t g_pool_list;
96 tpool_t *g_tpool;
97 boolean_t g_enumeration_done;
98 thread_t g_zfs_tid;
99
100 typedef struct unavailpool {
101 zpool_handle_t *uap_zhp;
102 list_node_t uap_node;
103 } unavailpool_t;
104
105 int
106 zfs_toplevel_state(zpool_handle_t *zhp)
107 {
108 nvlist_t *nvroot;
109 vdev_stat_t *vs;
110 unsigned int c;
111
112 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
113 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
114 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
115 (uint64_t **)&vs, &c) == 0);
116 return (vs->vs_state);
117 }
118
119 static int
120 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
121 {
122 if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
123 unavailpool_t *uap;
124 uap = malloc(sizeof (unavailpool_t));
125 uap->uap_zhp = zhp;
126 list_insert_tail((list_t *)data, uap);
127 } else {
128 zpool_close(zhp);
129 }
130 return (0);
131 }
132
133 /*
134 * The device associated with the given vdev (either by devid or physical path)
135 * has been added to the system. If 'isdisk' is set, then we only attempt a
136 * replacement if it's a whole disk. This also implies that we should label the
137 * disk first.
138 *
139 * First, we attempt to online the device (making sure to undo any spare
140 * operation when finished). If this succeeds, then we're done. If it fails,
141 * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
142 * but that the label was not what we expected. If the 'autoreplace' property
143 * is not set, then we relabel the disk (if specified), and attempt a 'zpool
144 * replace'. If the online is successful, but the new state is something else
145 * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
146 * race, and we should avoid attempting to relabel the disk.
147 */
148 static void
149 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
150 {
151 char *path;
152 vdev_state_t newstate;
153 nvlist_t *nvroot, *newvd;
154 uint64_t wholedisk = 0ULL;
155 uint64_t offline = 0ULL;
156 char *physpath = NULL;
157 char rawpath[PATH_MAX], fullpath[PATH_MAX];
158 zpool_boot_label_t boot_type;
159 uint64_t boot_size;
160 size_t len;
161
162 if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
163 return;
164
165 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
166 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
167 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
168
169 /*
170 * We should have a way to online a device by guid. With the current
171 * interface, we are forced to chop off the 's0' for whole disks.
172 */
173 (void) strlcpy(fullpath, path, sizeof (fullpath));
174 if (wholedisk)
175 fullpath[strlen(fullpath) - 2] = '\0';
176
177 /*
178 * Attempt to online the device. It would be nice to online this by
179 * GUID, but the current interface only supports lookup by path.
180 */
181 if (offline ||
182 (zpool_vdev_online(zhp, fullpath,
183 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
184 (newstate == VDEV_STATE_HEALTHY ||
185 newstate == VDEV_STATE_DEGRADED)))
186 return;
187
188 /*
189 * If the pool doesn't have the autoreplace property set, then attempt a
190 * true online (without the unspare flag), which will trigger a FMA
191 * fault.
192 */
193 if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
194 (isdisk && !wholedisk)) {
195 (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
196 &newstate);
197 return;
198 }
199
200 if (isdisk) {
201 /*
202 * If this is a request to label a whole disk, then attempt to
203 * write out the label. Before we can label the disk, we need
204 * access to a raw node. Ideally, we'd like to walk the devinfo
205 * tree and find a raw node from the corresponding parent node.
206 * This is overly complicated, and since we know how we labeled
207 * this device in the first place, we know it's save to switch
208 * from /dev/dsk to /dev/rdsk and append the backup slice.
209 *
210 * If any part of this process fails, then do a force online to
211 * trigger a ZFS fault for the device (and any hot spare
212 * replacement).
213 */
214 if (strncmp(path, ZFS_DISK_ROOTD,
215 strlen(ZFS_DISK_ROOTD)) != 0) {
216 (void) zpool_vdev_online(zhp, fullpath,
217 ZFS_ONLINE_FORCEFAULT, &newstate);
218 return;
219 }
220
221 (void) strlcpy(rawpath, path + 9, sizeof (rawpath));
222 len = strlen(rawpath);
223 rawpath[len - 2] = '\0';
224
225 if (zpool_is_bootable(zhp))
226 boot_type = ZPOOL_COPY_BOOT_LABEL;
227 else
228 boot_type = ZPOOL_NO_BOOT_LABEL;
229
230 boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
231 if (zpool_label_disk(g_zfshdl, zhp, rawpath,
232 boot_type, boot_size, NULL) != 0) {
233 (void) zpool_vdev_online(zhp, fullpath,
234 ZFS_ONLINE_FORCEFAULT, &newstate);
235 return;
236 }
237 }
238
239 /*
240 * Cosntruct the root vdev to pass to zpool_vdev_attach(). While adding
241 * the entire vdev structure is harmless, we construct a reduced set of
242 * path/physpath/wholedisk to keep it simple.
243 */
244 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
245 return;
246
247 if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
248 nvlist_free(nvroot);
249 return;
250 }
251
252 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
253 nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
254 (physpath != NULL && nvlist_add_string(newvd,
255 ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
256 nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
257 nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
258 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
259 1) != 0) {
260 nvlist_free(newvd);
261 nvlist_free(nvroot);
262 return;
263 }
264
265 nvlist_free(newvd);
266
267 (void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
268
269 nvlist_free(nvroot);
270
271 }
272
273 /*
274 * Utility functions to find a vdev matching given criteria.
275 */
276 typedef struct dev_data {
277 const char *dd_compare;
278 const char *dd_prop;
279 zfs_process_func_t dd_func;
280 boolean_t dd_found;
281 boolean_t dd_isdisk;
282 uint64_t dd_pool_guid;
283 uint64_t dd_vdev_guid;
284 } dev_data_t;
285
286 static void
287 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
288 {
289 dev_data_t *dp = data;
290 char *path;
291 uint_t c, children;
292 nvlist_t **child;
293 size_t len;
294 uint64_t guid;
295
296 /*
297 * First iterate over any children.
298 */
299 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
300 &child, &children) == 0) {
301 for (c = 0; c < children; c++)
302 zfs_iter_vdev(zhp, child[c], data);
303 return;
304 }
305
306 if (dp->dd_vdev_guid != 0) {
307 if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
308 &guid) != 0 || guid != dp->dd_vdev_guid)
309 return;
310 } else if (dp->dd_compare != NULL) {
311 len = strlen(dp->dd_compare);
312
313 if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
314 strncmp(dp->dd_compare, path, len) != 0)
315 return;
316
317 /*
318 * Normally, we want to have an exact match for the comparison
319 * string. However, we allow substring matches in the following
320 * cases:
321 *
322 * <path>: This is a devpath, and the target is one
323 * of its children.
324 *
325 * <path/> This is a devid for a whole disk, and
326 * the target is one of its children.
327 */
328 if (path[len] != '\0' && path[len] != ':' &&
329 path[len - 1] != '/')
330 return;
331 }
332
333 (dp->dd_func)(zhp, nvl, dp->dd_isdisk);
334 }
335
336 void
337 zfs_enable_ds(void *arg)
338 {
339 unavailpool_t *pool = (unavailpool_t *)arg;
340
341 (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
342 zpool_close(pool->uap_zhp);
343 free(pool);
344 }
345
346 static int
347 zfs_iter_pool(zpool_handle_t *zhp, void *data)
348 {
349 nvlist_t *config, *nvl;
350 dev_data_t *dp = data;
351 uint64_t pool_guid;
352 unavailpool_t *pool;
353
364 for (pool = list_head(&g_pool_list); pool != NULL;
365 pool = list_next(&g_pool_list, pool)) {
366
367 if (strcmp(zpool_get_name(zhp),
368 zpool_get_name(pool->uap_zhp)))
369 continue;
370 if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
371 list_remove(&g_pool_list, pool);
372 (void) tpool_dispatch(g_tpool, zfs_enable_ds,
373 pool);
374 break;
375 }
376 }
377 }
378
379 zpool_close(zhp);
380 return (0);
381 }
382
383 /*
384 * Given a physical device path, iterate over all (pool, vdev) pairs which
385 * correspond to the given path.
386 */
387 static boolean_t
388 devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
389 {
390 dev_data_t data = { 0 };
391
392 data.dd_compare = devpath;
393 data.dd_func = func;
394 data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
395 data.dd_found = B_FALSE;
396 data.dd_isdisk = wholedisk;
397
398 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
399
400 return (data.dd_found);
401 }
402
403 /*
404 * Given a /devices path, lookup the corresponding devid for each minor node,
405 * and find any vdevs with matching devids. Doing this straight up would be
406 * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
407 * the fact that each devid ends with "/<minornode>". Once we find any valid
408 * minor node, we chop off the portion after the last slash, and then search for
409 * matching vdevs, which is O(vdevs in system).
410 */
411 static boolean_t
412 devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
413 {
414 size_t len = strlen(devpath) + sizeof ("/devices") +
415 sizeof (PHYS_PATH) - 1;
416 char *fullpath;
417 int fd;
418 ddi_devid_t devid;
419 char *devidstr, *fulldevid;
420 dev_data_t data = { 0 };
421
422 /*
423 * Try to open a known minor node.
424 */
425 fullpath = alloca(len);
426 (void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
427 if ((fd = open(fullpath, O_RDONLY)) < 0)
428 return (B_FALSE);
429
430 /*
431 * Determine the devid as a string, with no trailing slash for the minor
432 * node.
433 */
434 if (devid_get(fd, &devid) != 0) {
435 (void) close(fd);
436 return (B_FALSE);
437 }
438 (void) close(fd);
439
440 if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
441 devid_free(devid);
442 return (B_FALSE);
443 }
444
445 len = strlen(devidstr) + 2;
446 fulldevid = alloca(len);
447 (void) snprintf(fulldevid, len, "%s/", devidstr);
448
449 data.dd_compare = fulldevid;
450 data.dd_func = func;
451 data.dd_prop = ZPOOL_CONFIG_DEVID;
452 data.dd_found = B_FALSE;
453 data.dd_isdisk = wholedisk;
454
455 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
456
457 devid_str_free(devidstr);
458 devid_free(devid);
459
460 return (data.dd_found);
461 }
462
463 /*
464 * This function is called when we receive a devfs add event. This can be
465 * either a disk event or a lofi event, and the behavior is slightly different
466 * depending on which it is.
467 */
468 static int
469 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
470 {
471 char *devpath, *devname;
472 char path[PATH_MAX], realpath[PATH_MAX];
473 char *colon, *raw;
474 int ret;
475
476 /*
477 * The main unit of operation is the physical device path. For disks,
478 * this is the device node, as all minor nodes are affected. For lofi
479 * devices, this includes the minor path. Unfortunately, this isn't
480 * represented in the DEV_PHYS_PATH for various reasons.
481 */
482 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
483 return (-1);
484
485 /*
486 * If this is a lofi device, then also get the minor instance name.
487 * Unfortunately, the current payload doesn't include an easy way to get
488 * this information. So we cheat by resolving the 'dev_name' (which
489 * refers to the raw device) and taking the portion between ':(*),raw'.
490 */
491 (void) strlcpy(realpath, devpath, sizeof (realpath));
492 if (is_lofi) {
493 if (nvlist_lookup_string(nvl, DEV_NAME,
494 &devname) == 0 &&
495 (ret = resolvepath(devname, path,
496 sizeof (path))) > 0) {
497 path[ret] = '\0';
498 colon = strchr(path, ':');
499 if (colon != NULL)
500 raw = strstr(colon + 1, ",raw");
501 if (colon != NULL && raw != NULL) {
502 *raw = '\0';
503 (void) snprintf(realpath,
504 sizeof (realpath), "%s%s",
505 devpath, colon);
506 *raw = ',';
507 }
508 }
509 }
510
511 /*
512 * Iterate over all vdevs with a matching devid, and then those with a
513 * matching /devices path. For disks, we only want to pay attention to
514 * vdevs marked as whole disks. For lofi, we don't care (because we're
515 * matching an exact minor name).
516 */
517 if (!devid_iter(realpath, zfs_process_add, !is_lofi))
518 (void) devpath_iter(realpath, zfs_process_add, !is_lofi);
519
520 return (0);
521 }
522
523 /*
524 * Called when we receive a VDEV_CHECK event, which indicates a device could not
525 * be opened during initial pool open, but the autoreplace property was set on
526 * the pool. In this case, we treat it as if it were an add event.
527 */
528 static int
529 zfs_deliver_check(nvlist_t *nvl)
530 {
531 dev_data_t data = { 0 };
532
533 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
534 &data.dd_pool_guid) != 0 ||
535 nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
536 &data.dd_vdev_guid) != 0 ||
537 data.dd_vdev_guid == 0)
538 return (0);
539
540 data.dd_isdisk = B_TRUE;
541 data.dd_func = zfs_process_add;
542
543 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
544
545 return (0);
546 }
547
548 #define DEVICE_PREFIX "/devices"
549
550 static int
551 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
552 {
553 char *devname = data;
554 boolean_t avail_spare, l2cache;
555 vdev_state_t newstate;
556 nvlist_t *tgt;
557
558 syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
559 devname, zpool_get_name(zhp));
560
561 if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
562 &avail_spare, &l2cache, NULL)) != NULL) {
563 char *path, fullpath[MAXPATHLEN];
564 uint64_t wholedisk = 0ULL;
565
566 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
567 &path) == 0);
568 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
569 &wholedisk) == 0);
570
571 (void) strlcpy(fullpath, path, sizeof (fullpath));
572 if (wholedisk) {
573 fullpath[strlen(fullpath) - 2] = '\0';
574
575 /*
576 * We need to reopen the pool associated with this
577 * device so that the kernel can update the size
578 * of the expanded device.
579 */
580 (void) zpool_reopen(zhp);
581 }
582
583 if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
584 syseventd_print(9, "zfsdle_vdev_online: setting device"
585 " device %s to ONLINE state in pool %s.\n",
586 fullpath, zpool_get_name(zhp));
587 if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
588 (void) zpool_vdev_online(zhp, fullpath, 0,
589 &newstate);
590 }
591 zpool_close(zhp);
592 return (1);
593 }
594 zpool_close(zhp);
595 return (0);
596 }
597
598 /*
599 * This function is called for each vdev of a pool for which any of the
600 * following events was recieved:
601 * - ESC_ZFS_vdev_add
602 * - ESC_ZFS_vdev_attach
603 * - ESC_ZFS_vdev_clear
604 * - ESC_ZFS_vdev_online
605 * - ESC_ZFS_pool_create
606 * - ESC_ZFS_pool_import
607 * It will update the vdevs FRU property if it is out of date.
608 */
609 /*ARGSUSED2*/
610 static void
611 zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
612 {
613 char *devpath, *cptr, *oldfru = NULL;
614 const char *newfru;
615 uint64_t vdev_guid;
616
617 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
618 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &devpath);
619 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
620
621 /* remove :<slice> from devpath */
622 cptr = strrchr(devpath, ':');
623 if (cptr != NULL)
624 *cptr = '\0';
625
626 newfru = libzfs_fru_lookup(g_zfshdl, devpath);
627 if (newfru == NULL) {
628 syseventd_print(9, "zfs_update_vdev_fru: no FRU for %s\n",
629 devpath);
630 return;
631 }
632
633 /* do nothing if the FRU hasn't changed */
634 if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
635 syseventd_print(9, "zfs_update_vdev_fru: FRU unchanged\n");
636 return;
637 }
638
639 syseventd_print(9, "zfs_update_vdev_fru: devpath = %s\n", devpath);
640 syseventd_print(9, "zfs_update_vdev_fru: FRU = %s\n", newfru);
641
642 (void) zpool_fru_set(zhp, vdev_guid, newfru);
643 }
644
645 /*
646 * This function handles the following events:
647 * - ESC_ZFS_vdev_add
648 * - ESC_ZFS_vdev_attach
649 * - ESC_ZFS_vdev_clear
650 * - ESC_ZFS_vdev_online
651 * - ESC_ZFS_pool_create
652 * - ESC_ZFS_pool_import
653 * It will iterate over the pool vdevs to update the FRU property.
654 */
655 int
656 zfs_deliver_update(nvlist_t *nvl)
657 {
658 dev_data_t dd = { 0 };
659 char *pname;
660 zpool_handle_t *zhp;
661 nvlist_t *config, *vdev;
662
663 if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
664 syseventd_print(9, "zfs_deliver_update: no pool name\n");
665 return (-1);
666 }
667
668 /*
669 * If this event was triggered by a pool export or destroy we cannot
670 * open the pool. This is not an error, just return 0 as we don't care
671 * about these events.
672 */
673 zhp = zpool_open_canfail(g_zfshdl, pname);
674 if (zhp == NULL)
675 return (0);
676
677 config = zpool_get_config(zhp, NULL);
678 if (config == NULL) {
679 syseventd_print(9, "zfs_deliver_update: "
680 "failed to get pool config for %s\n", pname);
681 zpool_close(zhp);
682 return (-1);
683 }
684
685 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
686 syseventd_print(0, "zfs_deliver_update: "
687 "failed to get vdev tree for %s\n", pname);
688 zpool_close(zhp);
689 return (-1);
690 }
691
692 libzfs_fru_refresh(g_zfshdl);
693
694 dd.dd_func = zfs_update_vdev_fru;
695 zfs_iter_vdev(zhp, vdev, &dd);
696
697 zpool_close(zhp);
698 return (0);
699 }
700
701 int
702 zfs_deliver_dle(nvlist_t *nvl)
703 {
704 char *devname;
705 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
706 syseventd_print(9, "zfs_deliver_event: no physpath\n");
707 return (-1);
708 }
709 if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
710 syseventd_print(9, "zfs_deliver_event: invalid "
711 "device '%s'", devname);
712 return (-1);
713 }
714
715 /*
716 * We try to find the device using the physical
717 * path that has been supplied. We need to strip off
718 * the /devices prefix before starting our search.
719 */
720 devname += strlen(DEVICE_PREFIX);
721 if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
722 syseventd_print(9, "zfs_deliver_event: device '%s' not"
723 " found\n", devname);
724 return (1);
725 }
726 return (0);
727 }
728
729
730 /*ARGSUSED*/
731 static int
732 zfs_deliver_event(sysevent_t *ev, int unused)
733 {
734 const char *class = sysevent_get_class_name(ev);
735 const char *subclass = sysevent_get_subclass_name(ev);
736 nvlist_t *nvl;
737 int ret;
738 boolean_t is_lofi = B_FALSE, is_check = B_FALSE;
739 boolean_t is_dle = B_FALSE, is_update = B_FALSE;
740
741 if (strcmp(class, EC_DEV_ADD) == 0) {
742 /*
743 * We're mainly interested in disk additions, but we also listen
744 * for new lofi devices, to allow for simplified testing.
745 */
746 if (strcmp(subclass, ESC_DISK) == 0)
747 is_lofi = B_FALSE;
748 else if (strcmp(subclass, ESC_LOFI) == 0)
749 is_lofi = B_TRUE;
750 else
751 return (0);
752
753 is_check = B_FALSE;
754 } else if (strcmp(class, EC_ZFS) == 0) {
755 if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
756 /*
757 * This event signifies that a device failed to open
758 * during pool load, but the 'autoreplace' property was
759 * set, so we should pretend it's just been added.
760 */
761 is_check = B_TRUE;
762 } else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
763 (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
764 (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
765 (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
766 (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
767 (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
768 /*
769 * When we receive these events we check the pool
770 * configuration and update the vdev FRUs if necessary.
771 */
772 is_update = B_TRUE;
773 }
774 } else if (strcmp(class, EC_DEV_STATUS) == 0 &&
775 strcmp(subclass, ESC_DEV_DLE) == 0) {
776 is_dle = B_TRUE;
777 } else {
778 return (0);
779 }
780
781 if (sysevent_get_attr_list(ev, &nvl) != 0)
782 return (-1);
783
784 if (is_dle)
785 ret = zfs_deliver_dle(nvl);
786 else if (is_update)
787 ret = zfs_deliver_update(nvl);
788 else if (is_check)
789 ret = zfs_deliver_check(nvl);
790 else
791 ret = zfs_deliver_add(nvl, is_lofi);
792
793 nvlist_free(nvl);
794 return (ret);
795 }
796
797 /*ARGSUSED*/
798 void *
799 zfs_enum_pools(void *arg)
800 {
801 (void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
802 if (!list_is_empty(&g_pool_list))
803 g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
804 0, NULL);
805 g_enumeration_done = B_TRUE;
806 return (NULL);
807 }
808
809 static struct slm_mod_ops zfs_mod_ops = {
810 SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
811 };
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright 2017 Nexenta Systems, Inc.
26 */
27
28 /*
29 * ZFS syseventd module.
30 *
31 * The purpose of this module is to process ZFS related events.
32 *
33 * EC_DEV_ADD
34 * ESC_DISK Search for associated vdevs matching devid, physpath,
35 * or FRU, and appropriately online or replace the device.
36 *
37 * EC_DEV_STATUS
38 * ESC_DEV_DLE Device capacity dynamically changed. Process the change
39 * according to 'autoexpand' property.
40 *
41 * EC_ZFS
42 * ESC_ZFS_VDEV_CHECK This event indicates that a device failed to open during
43 * pool load, but the autoreplace property was set. In
44 * this case the associated FMA fault was deferred until
45 * the module had a chance to process the autoreplace
46 * logic. If the device could not be replaced, then the
47 * second online attempt will trigger the FMA fault that
48 * was skipped earlier.
49 * ESC_ZFS_VDEV_ADD
50 * ESC_ZFS_VDEV_ATTACH
51 * ESC_ZFS_VDEV_CLEAR
52 * ESC_ZFS_VDEV_ONLINE
53 * ESC_ZFS_POOL_CREATE
54 * ESC_ZFS_POOL_IMPORT All of the above events will trigger the update of
55 * FRU for all associated devices.
56 */
57
58 #include <alloca.h>
59 #include <devid.h>
60 #include <fcntl.h>
61 #include <libnvpair.h>
62 #include <libsysevent.h>
63 #include <libzfs.h>
64 #include <limits.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <sys/list.h>
68 #include <sys/sunddi.h>
69 #include <sys/fs/zfs.h>
70 #include <sys/sysevent/eventdefs.h>
71 #include <sys/sysevent/dev.h>
72 #include <thread_pool.h>
73 #include <unistd.h>
74 #include "syseventd.h"
75
76 #if defined(__i386) || defined(__amd64)
77 #define WD_MINOR ":q"
78 #elif defined(__sparc)
79 #define WD_MINOR ":c"
80 #else
81 #error Unknown architecture
82 #endif
83
84 #define DEVICE_PREFIX "/devices"
85
86 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, const char *);
87
88 libzfs_handle_t *g_zfshdl;
89 list_t g_pool_list;
90 tpool_t *g_tpool;
91 boolean_t g_enumeration_done;
92 thread_t g_zfs_tid;
93
94 typedef struct unavailpool {
95 zpool_handle_t *uap_zhp;
96 list_node_t uap_node;
97 } unavailpool_t;
98
99 int
100 zfs_toplevel_state(zpool_handle_t *zhp)
101 {
102 nvlist_t *nvroot;
103 vdev_stat_t *vs;
104 unsigned int c;
105
106 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
107 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
108 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
109 (uint64_t **)&vs, &c) == 0);
110 return (vs->vs_state);
111 }
112
113 static int
114 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
115 {
116 if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
117 unavailpool_t *uap;
118 uap = malloc(sizeof (unavailpool_t));
119 uap->uap_zhp = zhp;
120 list_insert_tail((list_t *)data, uap);
121 } else {
122 zpool_close(zhp);
123 }
124 return (0);
125 }
126
127 /*
128 * The device associated with the given vdev (matched by devid, physical path,
129 * or FRU) has been added to the system.
130 */
131 static void
132 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, const char *newrawpath)
133 {
134 vdev_state_t newstate;
135 nvlist_t *nvroot = NULL, *newvd = NULL;
136 uint64_t wholedisk = 0ULL;
137 uint64_t offline = 0ULL;
138 boolean_t avail_spare, l2cache;
139 const char *zc_type = ZPOOL_CONFIG_CHILDREN;
140 char *devpath; /* current /dev path */
141 char *physpath; /* current /devices node */
142 char fullpath[PATH_MAX]; /* current /dev path without slice */
143 char fullphyspath[PATH_MAX]; /* full /devices phys path */
144 char newdevpath[PATH_MAX]; /* new /dev path */
145 char newphyspath[PATH_MAX]; /* new /devices node */
146 char diskname[PATH_MAX]; /* disk device without /dev and slice */
147 const char *adevid = NULL; /* devid to attach */
148 const char *adevpath; /* /dev path to attach */
149 const char *aphyspath = NULL; /* /devices node to attach */
150 zpool_boot_label_t boot_type;
151 uint64_t boot_size;
152
153 if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &devpath) != 0)
154 return;
155 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
156 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
157 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
158
159 /* Do nothing if vdev is explicitly marked offline */
160 if (offline)
161 return;
162
163 (void) strlcpy(fullpath, devpath, sizeof (fullpath));
164 /* Chop off slice for whole disks */
165 if (wholedisk)
166 fullpath[strlen(fullpath) - 2] = '\0';
167
168 /*
169 * Device could still have valid label, so first attempt to online the
170 * device undoing any spare operation. If online succeeds and new state
171 * is either HEALTHY or DEGRADED, we are done.
172 */
173 if (zpool_vdev_online(zhp, fullpath,
174 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
175 (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED))
176 return;
177
178 /*
179 * If the pool doesn't have the autoreplace property set or this is a
180 * non-whole disk vdev, there's nothing else we can do so attempt a true
181 * online (without the unspare flag), which will trigger a FMA fault.
182 */
183 if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) == 0 ||
184 !wholedisk) {
185 (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
186 &newstate);
187 return;
188 }
189
190 /*
191 * Attempt to replace the device.
192 *
193 * If newrawpath is set (not NULL), then we matched by FRU and need to
194 * use new /dev and /devices paths for attach.
195 *
196 * First, construct the short disk name to label, chopping off any
197 * leading /dev path and slice (which newrawpath doesn't include).
198 */
199 if (newrawpath != NULL) {
200 (void) strlcpy(diskname, newrawpath +
201 strlen(ZFS_RDISK_ROOTD), sizeof (diskname));
202 } else {
203 (void) strlcpy(diskname, fullpath +
204 strlen(ZFS_DISK_ROOTD), sizeof (diskname));
205 }
206
207 /* Write out the label */
208 if (zpool_is_bootable(zhp))
209 boot_type = ZPOOL_COPY_BOOT_LABEL;
210 else
211 boot_type = ZPOOL_NO_BOOT_LABEL;
212
213 boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
214 if (zpool_label_disk(g_zfshdl, zhp, diskname, boot_type, boot_size,
215 NULL) != 0) {
216 syseventd_print(9, "%s: failed to write the label\n", __func__);
217 return;
218 }
219
220 /* Define "path" and "physpath" to be used for attach */
221 if (newrawpath != NULL) {
222 /* Construct newdevpath from newrawpath */
223 (void) snprintf(newdevpath, sizeof (newdevpath), "%s%s%s",
224 ZFS_DISK_ROOTD, newrawpath + strlen(ZFS_RDISK_ROOTD),
225 (boot_size > 0) ? "s1" : "s0");
226 /* Use replacing vdev's "path" and "physpath" */
227 adevpath = newdevpath;
228 /* Resolve /dev path to /devices node */
229 aphyspath = realpath(newdevpath, newphyspath) +
230 strlen(DEVICE_PREFIX);
231 } else {
232 /* Use original vdev's "path" and "physpath" */
233 adevpath = devpath;
234 aphyspath = physpath;
235 }
236
237 /* Construct new devid */
238 (void) snprintf(fullphyspath, sizeof (fullphyspath), "%s%s",
239 DEVICE_PREFIX, aphyspath);
240 adevid = devid_str_from_path(fullphyspath);
241
242 /*
243 * Check if replaced vdev is "available" (not swapped in) spare
244 * or l2cache device.
245 */
246 (void) zpool_find_vdev(zhp, fullpath, &avail_spare, &l2cache, NULL,
247 NULL);
248 if (avail_spare)
249 zc_type = ZPOOL_CONFIG_SPARES;
250 else if (l2cache)
251 zc_type = ZPOOL_CONFIG_L2CACHE;
252
253 /* Construct the root vdev */
254 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 ||
255 nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0)
256 goto fail;
257
258 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
259 (adevid != NULL &&
260 nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, adevid) != 0) ||
261 nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, adevpath) != 0 ||
262 (aphyspath != NULL &&
263 nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, aphyspath) != 0) ||
264 nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
265 nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
266 nvlist_add_nvlist_array(nvroot, zc_type, &newvd, 1) != 0)
267 goto fail;
268
269 if (avail_spare || l2cache) {
270 /*
271 * For spares/l2cache, we need to explicitly remove the device
272 * and add the new one.
273 */
274 (void) zpool_vdev_remove(zhp, fullpath);
275 (void) zpool_add(zhp, nvroot);
276 } else {
277 /* Do the replace for regular vdevs */
278 (void) zpool_vdev_attach(zhp, fullpath, adevpath, nvroot,
279 B_TRUE);
280 }
281
282 fail:
283 if (adevid != NULL)
284 devid_str_free((char *)adevid);
285 nvlist_free(newvd);
286 nvlist_free(nvroot);
287 }
288
289 /*
290 * Utility functions to find a vdev matching given criteria.
291 */
292 typedef struct dev_data {
293 const char *dd_compare;
294 const char *dd_prop;
295 const char *dd_devpath;
296 zfs_process_func_t dd_func;
297 int (*dd_cmp_func)(libzfs_handle_t *, const char *,
298 const char *, size_t);
299 boolean_t dd_found;
300 uint64_t dd_pool_guid;
301 uint64_t dd_vdev_guid;
302 } dev_data_t;
303
304 static void
305 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
306 {
307 dev_data_t *dp = data;
308 boolean_t nested = B_FALSE;
309 char *cmp_str;
310 nvlist_t **cnvl, **snvl, **lnvl;
311 uint_t i, nc, ns, nl;
312 uint64_t guid;
313
314 /* Iterate over child vdevs */
315 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
316 &cnvl, &nc) == 0) {
317 for (i = 0; i < nc; i++)
318 zfs_iter_vdev(zhp, cnvl[i], data);
319 nested = B_TRUE;
320 }
321 /* Iterate over spare vdevs */
322 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
323 &snvl, &ns) == 0) {
324 for (i = 0; i < ns; i++)
325 zfs_iter_vdev(zhp, snvl[i], data);
326 nested = B_TRUE;
327 }
328 /* Iterate over l2cache vdevs */
329 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
330 &lnvl, &nl) == 0) {
331 for (i = 0; i < nl; i++)
332 zfs_iter_vdev(zhp, lnvl[i], data);
333 nested = B_TRUE;
334 }
335
336 if (nested)
337 return;
338
339 if (dp->dd_vdev_guid != 0 && (nvlist_lookup_uint64(nvl,
340 ZPOOL_CONFIG_GUID, &guid) != 0 || guid != dp->dd_vdev_guid))
341 return;
342
343 if (dp->dd_compare != NULL && (nvlist_lookup_string(nvl, dp->dd_prop,
344 &cmp_str) != 0 || dp->dd_cmp_func(g_zfshdl, dp->dd_compare, cmp_str,
345 strlen(dp->dd_compare)) != 0))
346 return;
347
348 dp->dd_found = B_TRUE;
349 (dp->dd_func)(zhp, nvl, dp->dd_devpath);
350 }
351
352 void
353 zfs_enable_ds(void *arg)
354 {
355 unavailpool_t *pool = (unavailpool_t *)arg;
356
357 (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
358 zpool_close(pool->uap_zhp);
359 free(pool);
360 }
361
362 static int
363 zfs_iter_pool(zpool_handle_t *zhp, void *data)
364 {
365 nvlist_t *config, *nvl;
366 dev_data_t *dp = data;
367 uint64_t pool_guid;
368 unavailpool_t *pool;
369
380 for (pool = list_head(&g_pool_list); pool != NULL;
381 pool = list_next(&g_pool_list, pool)) {
382
383 if (strcmp(zpool_get_name(zhp),
384 zpool_get_name(pool->uap_zhp)))
385 continue;
386 if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
387 list_remove(&g_pool_list, pool);
388 (void) tpool_dispatch(g_tpool, zfs_enable_ds,
389 pool);
390 break;
391 }
392 }
393 }
394
395 zpool_close(zhp);
396 return (0);
397 }
398
399 /*
400 * Wrap strncmp() to be used as comparison function for devid_iter() and
401 * physpath_iter().
402 */
403 /* ARGSUSED */
404 static int
405 strncmp_wrap(libzfs_handle_t *hdl, const char *a, const char *b, size_t len)
406 {
407 return (strncmp(a, b, len));
408 }
409
410 /*
411 * Given a physical device path, iterate over all (pool, vdev) pairs which
412 * correspond to the given path's FRU.
413 */
414 static boolean_t
415 devfru_iter(const char *devpath, const char *physpath, zfs_process_func_t func)
416 {
417 dev_data_t data = { 0 };
418 const char *fru;
419
420 /*
421 * Need to refresh the fru cache otherwise we won't find the newly
422 * inserted disk.
423 */
424 libzfs_fru_refresh(g_zfshdl);
425
426 fru = libzfs_fru_lookup(g_zfshdl, physpath);
427 if (fru == NULL)
428 return (B_FALSE);
429
430 data.dd_compare = fru;
431 data.dd_func = func;
432 data.dd_cmp_func = libzfs_fru_cmp_slot;
433 data.dd_prop = ZPOOL_CONFIG_FRU;
434 data.dd_found = B_FALSE;
435 data.dd_devpath = devpath;
436
437 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
438
439 return (data.dd_found);
440 }
441
442 /*
443 * Given a physical device path, iterate over all (pool, vdev) pairs which
444 * correspond to the given path.
445 */
446 /*ARGSUSED*/
447 static boolean_t
448 physpath_iter(const char *devpath, const char *physpath,
449 zfs_process_func_t func)
450 {
451 dev_data_t data = { 0 };
452
453 data.dd_compare = physpath;
454 data.dd_func = func;
455 data.dd_cmp_func = strncmp_wrap;
456 data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
457 data.dd_found = B_FALSE;
458 data.dd_devpath = NULL;
459
460 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
461
462 return (data.dd_found);
463 }
464
465 /*
466 * Given a devid, iterate over all (pool, vdev) pairs which correspond to the
467 * given vdev.
468 */
469 /*ARGSUSED*/
470 static boolean_t
471 devid_iter(const char *devpath, const char *physpath, zfs_process_func_t func)
472 {
473 char fullphyspath[PATH_MAX];
474 char *devidstr;
475 char *s;
476 dev_data_t data = { 0 };
477
478 /* Try to open a known minor node */
479 (void) snprintf(fullphyspath, sizeof (fullphyspath), "%s%s%s",
480 DEVICE_PREFIX, physpath, WD_MINOR);
481
482 devidstr = devid_str_from_path(fullphyspath);
483 if (devidstr == NULL)
484 return (B_FALSE);
485 /* Chop off the minor node */
486 if ((s = strrchr(devidstr, '/')) != NULL)
487 *(s + 1) = '\0';
488
489 data.dd_compare = devidstr;
490 data.dd_func = func;
491 data.dd_cmp_func = strncmp_wrap;
492 data.dd_prop = ZPOOL_CONFIG_DEVID;
493 data.dd_found = B_FALSE;
494 data.dd_devpath = NULL;
495
496 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
497
498 devid_str_free(devidstr);
499
500 return (data.dd_found);
501 }
502
503 /*
504 * This function is called when we receive a devfs add event.
505 */
506 static int
507 zfs_deliver_add(nvlist_t *nvl)
508 {
509 char *devpath, *physpath;
510
511 if (nvlist_lookup_string(nvl, DEV_NAME, &devpath) != 0 ||
512 nvlist_lookup_string(nvl, DEV_PHYS_PATH, &physpath) != 0)
513 return (-1);
514
515 /*
516 * Iterate over all vdevs with a matching devid, then those with a
517 * matching /devices path, and finally those with a matching FRU slot
518 * number, only paying attention to vdevs marked as whole disks.
519 */
520 if (!devid_iter(devpath, physpath, zfs_process_add) &&
521 !physpath_iter(devpath, physpath, zfs_process_add) &&
522 !devfru_iter(devpath, physpath, zfs_process_add)) {
523 syseventd_print(9, "%s: match failed devpath=%s physpath=%s\n",
524 __func__, devpath, physpath);
525 }
526
527 return (0);
528 }
529
530 /*
531 * Called when we receive a VDEV_CHECK event, which indicates a device could not
532 * be opened during initial pool open, but the autoreplace property was set on
533 * the pool. In this case, we treat it as if it were an add event.
534 */
535 static int
536 zfs_deliver_check(nvlist_t *nvl)
537 {
538 dev_data_t data = { 0 };
539
540 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
541 &data.dd_pool_guid) != 0 ||
542 nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
543 &data.dd_vdev_guid) != 0 ||
544 data.dd_vdev_guid == 0)
545 return (0);
546
547 data.dd_func = zfs_process_add;
548
549 (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
550
551 return (0);
552 }
553
554 static int
555 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
556 {
557 char *devname = data;
558 boolean_t avail_spare, l2cache;
559 vdev_state_t newstate;
560 nvlist_t *tgt;
561
562 syseventd_print(9, "%s: searching for %s in pool %s\n", __func__,
563 devname, zpool_get_name(zhp));
564
565 if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
566 &avail_spare, &l2cache, NULL)) != NULL) {
567 char *path, fullpath[MAXPATHLEN];
568 uint64_t wholedisk = 0ULL;
569
570 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
571 &path) == 0);
572 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
573 &wholedisk) == 0);
574
575 (void) strlcpy(fullpath, path, sizeof (fullpath));
576 if (wholedisk) {
577 fullpath[strlen(fullpath) - 2] = '\0';
578
579 /*
580 * We need to reopen the pool associated with this
581 * device so that the kernel can update the size
582 * of the expanded device.
583 */
584 (void) zpool_reopen(zhp);
585 }
586
587 if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
588 syseventd_print(9, "%s: setting device '%s' to ONLINE "
589 "state in pool %s\n", __func__, fullpath,
590 zpool_get_name(zhp));
591 if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
592 (void) zpool_vdev_online(zhp, fullpath, 0,
593 &newstate);
594 }
595 zpool_close(zhp);
596 return (1);
597 }
598 zpool_close(zhp);
599 return (0);
600 }
601
602 /*
603 * This function is called for each vdev of a pool for which any of the
604 * following events was received:
605 * - ESC_ZFS_vdev_add
606 * - ESC_ZFS_vdev_attach
607 * - ESC_ZFS_vdev_clear
608 * - ESC_ZFS_vdev_online
609 * - ESC_ZFS_pool_create
610 * - ESC_ZFS_pool_import
611 * It will update the vdevs FRU property if it is out of date.
612 */
613 /*ARGSUSED*/
614 static void
615 zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, const char *devpath)
616 {
617 char *physpath, *cptr, *oldfru = NULL;
618 const char *newfru;
619 uint64_t vdev_guid;
620
621 (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
622 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
623 (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
624
625 /* Remove :<slice> from physpath */
626 cptr = strrchr(physpath, ':');
627 if (cptr != NULL)
628 *cptr = '\0';
629
630 newfru = libzfs_fru_lookup(g_zfshdl, physpath);
631 if (newfru == NULL) {
632 syseventd_print(9, "%s: physpath=%s newFRU=<none>\n", __func__,
633 physpath);
634 return;
635 }
636
637 /* Do nothing if the FRU hasn't changed */
638 if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
639 syseventd_print(9, "%s: physpath=%s newFRU=<unchanged>\n",
640 __func__, physpath);
641 return;
642 }
643
644 syseventd_print(9, "%s: physpath=%s newFRU=%s\n", __func__, physpath,
645 newfru);
646
647 (void) zpool_fru_set(zhp, vdev_guid, newfru);
648 }
649
650 /*
651 * This function handles the following events:
652 * - ESC_ZFS_vdev_add
653 * - ESC_ZFS_vdev_attach
654 * - ESC_ZFS_vdev_clear
655 * - ESC_ZFS_vdev_online
656 * - ESC_ZFS_pool_create
657 * - ESC_ZFS_pool_import
658 * It will iterate over the pool vdevs to update the FRU property.
659 */
660 int
661 zfs_deliver_update(nvlist_t *nvl)
662 {
663 dev_data_t dd = { 0 };
664 char *pname;
665 zpool_handle_t *zhp;
666 nvlist_t *config, *vdev;
667
668 if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
669 syseventd_print(9, "%s: no pool name\n", __func__);
670 return (-1);
671 }
672
673 /*
674 * If this event was triggered by a pool export or destroy we cannot
675 * open the pool. This is not an error, just return 0 as we don't care
676 * about these events.
677 */
678 zhp = zpool_open_canfail(g_zfshdl, pname);
679 if (zhp == NULL)
680 return (0);
681
682 config = zpool_get_config(zhp, NULL);
683 if (config == NULL) {
684 syseventd_print(9, "%s: failed to get pool config for %s\n",
685 __func__, pname);
686 zpool_close(zhp);
687 return (-1);
688 }
689
690 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
691 syseventd_print(0, "%s: failed to get vdev tree for %s\n",
692 __func__, pname);
693 zpool_close(zhp);
694 return (-1);
695 }
696
697 libzfs_fru_refresh(g_zfshdl);
698
699 dd.dd_func = zfs_update_vdev_fru;
700 zfs_iter_vdev(zhp, vdev, &dd);
701
702 zpool_close(zhp);
703 return (0);
704 }
705
706 int
707 zfs_deliver_dle(nvlist_t *nvl)
708 {
709 char *physpath;
710
711 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &physpath) != 0) {
712 syseventd_print(9, "%s: no physpath\n", __func__);
713 return (-1);
714 }
715 if (strncmp(physpath, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
716 syseventd_print(9, "%s: invalid device '%s'", __func__,
717 physpath);
718 return (-1);
719 }
720
721 /*
722 * We try to find the device using the physical
723 * path that has been supplied. We need to strip off
724 * the /devices prefix before starting our search.
725 */
726 physpath += strlen(DEVICE_PREFIX);
727 if (zpool_iter(g_zfshdl, zfsdle_vdev_online, physpath) != 1) {
728 syseventd_print(9, "%s: device '%s' not found\n",
729 __func__, physpath);
730 return (1);
731 }
732 return (0);
733 }
734
735
736 /*ARGSUSED*/
737 static int
738 zfs_deliver_event(sysevent_t *ev, int unused)
739 {
740 const char *class = sysevent_get_class_name(ev);
741 const char *subclass = sysevent_get_subclass_name(ev);
742 nvlist_t *nvl;
743 int ret;
744 boolean_t is_check = B_FALSE;
745 boolean_t is_dle = B_FALSE;
746 boolean_t is_update = B_FALSE;
747
748 if (strcmp(class, EC_DEV_ADD) == 0) {
749 /* We're only interested in disk additions */
750 if (strcmp(subclass, ESC_DISK) != 0)
751 return (0);
752 } else if (strcmp(class, EC_ZFS) == 0) {
753 if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
754 /*
755 * This event signifies that a device failed to open
756 * during pool load, but the 'autoreplace' property was
757 * set, so we should pretend it's just been added.
758 */
759 is_check = B_TRUE;
760 } else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
761 (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
762 (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
763 (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
764 (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
765 (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
766 /*
767 * When we receive these events we check the pool
768 * configuration and update the vdev FRUs if necessary.
769 */
770 is_update = B_TRUE;
771 }
772 } else if (strcmp(class, EC_DEV_STATUS) == 0 &&
773 strcmp(subclass, ESC_DEV_DLE) == 0) {
774 is_dle = B_TRUE;
775 } else {
776 return (0);
777 }
778
779 if (sysevent_get_attr_list(ev, &nvl) != 0)
780 return (-1);
781
782 if (is_dle)
783 ret = zfs_deliver_dle(nvl);
784 else if (is_update)
785 ret = zfs_deliver_update(nvl);
786 else if (is_check)
787 ret = zfs_deliver_check(nvl);
788 else
789 ret = zfs_deliver_add(nvl);
790
791 nvlist_free(nvl);
792 return (ret);
793 }
794
795 /*ARGSUSED*/
796 void *
797 zfs_enum_pools(void *arg)
798 {
799 (void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
800 if (!list_is_empty(&g_pool_list))
801 g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
802 0, NULL);
803 g_enumeration_done = B_TRUE;
804 return (NULL);
805 }
806
807 static struct slm_mod_ops zfs_mod_ops = {
808 SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
809 };
|