1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2017 Nexenta Systems, Inc.
26 */
27
28 #include <dlfcn.h>
29 #include <errno.h>
30 #include <libintl.h>
31 #include <link.h>
32 #include <pthread.h>
33 #include <strings.h>
34 #include <unistd.h>
35
36 #include <libzfs.h>
37
38 #include <fm/libtopo.h>
39 #include <fm/topo_hc.h>
40 #include <sys/fm/protocol.h>
41 #include <sys/systeminfo.h>
42
43 #include "libzfs_impl.h"
44
45 /*
46 * This file is responsible for determining the relationship between I/O
47 * devices paths and physical locations. In the world of MPxIO and external
48 * enclosures, the device path is not synonymous with the physical location.
49 * If you remove a drive and insert it into a different slot, it will end up
50 * with the same path under MPxIO. If you recable storage enclosures, the
51 * device paths may change. All of this makes it difficult to implement the
52 * 'autoreplace' property, which is supposed to automatically manage disk
53 * replacement based on physical slot.
54 *
55 * In order to work around these limitations, we have a per-vdev FRU property
56 * that is the libtopo path (minus disk-specific authority information) to the
57 * physical location of the device on the system. This is an optional
58 * property, and is only needed when using the 'autoreplace' property or when
59 * generating FMA faults against vdevs.
60 */
61
62 /*
63 * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
64 * it is not present. We only need this once per library instance, so it is
65 * not part of the libzfs handle.
66 */
67 static void *_topo_dlhandle;
68 static topo_hdl_t *(*_topo_open)(int, const char *, int *);
69 static void (*_topo_close)(topo_hdl_t *);
70 static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
71 static void (*_topo_snap_release)(topo_hdl_t *);
72 static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
73 topo_walk_cb_t, void *, int *);
74 static int (*_topo_walk_step)(topo_walk_t *, int);
75 static void (*_topo_walk_fini)(topo_walk_t *);
76 static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
77 static char *(*_topo_node_name)(tnode_t *);
78 static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
79 char **, int *);
80 static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
81 static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
82 static int (*_topo_fmri_str2nvl)(topo_hdl_t *, const char *, nvlist_t **,
83 int *);
84 static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
85 const char *);
86
87 #define ZFS_FRU_HASH_SIZE 257
88
89 static size_t
90 fru_strhash(const char *key)
91 {
92 ulong_t g, h = 0;
93 const char *p;
94
95 for (p = key; *p != '\0'; p++) {
96 h = (h << 4) + *p;
97
98 if ((g = (h & 0xf0000000)) != 0) {
99 h ^= (g >> 24);
100 h ^= g;
101 }
102 }
103
104 return (h % ZFS_FRU_HASH_SIZE);
105 }
106
107 static int
108 libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
109 {
110 libzfs_handle_t *hdl = arg;
111 nvlist_t *fru;
112 char *devpath, *frustr;
113 int err;
114 libzfs_fru_t *frup;
115 size_t idx;
116
117 /*
118 * If this is the chassis node, and we don't yet have the system
119 * chassis ID, then fill in this value now.
120 */
121 if (hdl->libzfs_chassis_id[0] == '\0' &&
122 strcmp(_topo_node_name(tn), "chassis") == 0) {
123 if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
124 FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
125 (void) strlcpy(hdl->libzfs_chassis_id, devpath,
126 sizeof (hdl->libzfs_chassis_id));
127 }
128
129 /*
130 * Skip non-disk nodes.
131 */
132 if (strcmp(_topo_node_name(tn), "disk") != 0)
133 return (TOPO_WALK_NEXT);
134
135 /*
136 * Get the devfs path and FRU.
137 */
138 if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
139 return (TOPO_WALK_NEXT);
140
141 if (libzfs_fru_lookup(hdl, devpath) != NULL) {
142 _topo_hdl_strfree(thp, devpath);
143 return (TOPO_WALK_NEXT);
144 }
145
146 if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
147 _topo_hdl_strfree(thp, devpath);
148 return (TOPO_WALK_NEXT);
149 }
150
151 /*
152 * Convert the FRU into a string.
153 */
154 if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
155 nvlist_free(fru);
156 _topo_hdl_strfree(thp, devpath);
157 return (TOPO_WALK_NEXT);
158 }
159
160 nvlist_free(fru);
161
162 /*
163 * Finally, we have a FRU string and device path. Add it to the hash.
164 */
165 if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
166 _topo_hdl_strfree(thp, devpath);
167 _topo_hdl_strfree(thp, frustr);
168 return (TOPO_WALK_NEXT);
169 }
170
171 if ((frup->zf_device = strdup(devpath)) == NULL ||
172 (frup->zf_fru = strdup(frustr)) == NULL) {
173 free(frup->zf_device);
174 free(frup);
175 _topo_hdl_strfree(thp, devpath);
176 _topo_hdl_strfree(thp, frustr);
177 return (TOPO_WALK_NEXT);
178 }
179
180 _topo_hdl_strfree(thp, devpath);
181 _topo_hdl_strfree(thp, frustr);
182
183 idx = fru_strhash(frup->zf_device);
184 frup->zf_chain = hdl->libzfs_fru_hash[idx];
185 hdl->libzfs_fru_hash[idx] = frup;
186 frup->zf_next = hdl->libzfs_fru_list;
187 hdl->libzfs_fru_list = frup;
188
189 return (TOPO_WALK_NEXT);
190 }
191
192 /*
193 * Given a disk FRU, check that FRU contains a slot number and remove FRU
194 * details that aren't needed when comparing FRUs by slot number.
195 */
196 static char *
197 diskfru_to_slot(libzfs_handle_t *hdl, const char *diskfru)
198 {
199 nvlist_t *nvl, **hc;
200 char *hc_name, *tmp = NULL;
201 int ret, i;
202 uint_t hc_cnt;
203
204 /* string -> nvlist */
205 if (_topo_fmri_str2nvl(hdl->libzfs_topo_hdl, diskfru, &nvl, &ret) != 0)
206 return (NULL);
207
208 /* Need slot (bay) number in the FRU */
209 if (nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hc,
210 &hc_cnt) != 0)
211 goto out;
212
213 for (i = 0; i < hc_cnt; i++) {
214 if (nvlist_lookup_string(hc[i], FM_FMRI_HC_NAME,
215 &hc_name) == 0 && strcmp(hc_name, BAY) == 0)
216 break;
217 }
218 if (i == hc_cnt)
219 goto out;
220
221 /* Drop the unwanted components */
222 (void) nvlist_remove_all(nvl, FM_FMRI_HC_SERIAL_ID);
223 (void) nvlist_remove_all(nvl, FM_FMRI_HC_PART);
224 (void) nvlist_remove_all(nvl, FM_FMRI_HC_REVISION);
225
226 /* nvlist -> string */
227 if (_topo_fmri_nvl2str(hdl->libzfs_topo_hdl, nvl, &tmp, &ret) != 0)
228 tmp = NULL;
229 out:
230 nvlist_free(nvl);
231 return (tmp);
232 }
233
234 /*
235 * Check if given FRUs match by slot number to skip comparing disk specific
236 * fields of the FRU.
237 */
238 /* ARGSUSED */
239 int
240 libzfs_fru_cmp_slot(libzfs_handle_t *hdl, const char *a, const char *b,
241 size_t len)
242 {
243 char *slota, *slotb;
244 int ret = -1;
245
246 if (a == NULL || b == NULL)
247 return (-1);
248
249 slota = diskfru_to_slot(hdl, a);
250 slotb = diskfru_to_slot(hdl, b);
251
252 if (slota != NULL && slotb != NULL)
253 ret = strcmp(slota, slotb);
254
255 _topo_hdl_strfree(hdl->libzfs_topo_hdl, slota);
256 _topo_hdl_strfree(hdl->libzfs_topo_hdl, slotb);
257
258 return (ret);
259 }
260
261 /*
262 * Called during initialization to setup the dynamic libtopo connection.
263 */
264 #pragma init(libzfs_init_fru)
265 static void
266 libzfs_init_fru(void)
267 {
268 char path[MAXPATHLEN];
269 char isa[257];
270
271 #if defined(_LP64)
272 if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
273 isa[0] = '\0';
274 #else
275 isa[0] = '\0';
276 #endif
277 (void) snprintf(path, sizeof (path),
278 "/usr/lib/fm/%s/libtopo.so", isa);
279
280 if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
281 return;
282
283 _topo_open = (topo_hdl_t *(*)())
284 dlsym(_topo_dlhandle, "topo_open");
285 _topo_close = (void (*)())
286 dlsym(_topo_dlhandle, "topo_close");
287 _topo_snap_hold = (char *(*)())
288 dlsym(_topo_dlhandle, "topo_snap_hold");
289 _topo_snap_release = (void (*)())
290 dlsym(_topo_dlhandle, "topo_snap_release");
291 _topo_walk_init = (topo_walk_t *(*)())
292 dlsym(_topo_dlhandle, "topo_walk_init");
293 _topo_walk_step = (int (*)())
294 dlsym(_topo_dlhandle, "topo_walk_step");
295 _topo_walk_fini = (void (*)())
296 dlsym(_topo_dlhandle, "topo_walk_fini");
297 _topo_hdl_strfree = (void (*)())
298 dlsym(_topo_dlhandle, "topo_hdl_strfree");
299 _topo_node_name = (char *(*)())
300 dlsym(_topo_dlhandle, "topo_node_name");
301 _topo_prop_get_string = (int (*)())
302 dlsym(_topo_dlhandle, "topo_prop_get_string");
303 _topo_node_fru = (int (*)())
304 dlsym(_topo_dlhandle, "topo_node_fru");
305 _topo_fmri_nvl2str = (int (*)())
306 dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
307 _topo_fmri_str2nvl = (int (*)())
308 dlsym(_topo_dlhandle, "topo_fmri_str2nvl");
309 _topo_fmri_strcmp_noauth = (int (*)())
310 dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
311
312 if (_topo_open == NULL || _topo_close == NULL ||
313 _topo_snap_hold == NULL || _topo_snap_release == NULL ||
314 _topo_walk_init == NULL || _topo_walk_step == NULL ||
315 _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
316 _topo_node_name == NULL || _topo_prop_get_string == NULL ||
317 _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
318 _topo_fmri_str2nvl == NULL || _topo_fmri_strcmp_noauth == NULL) {
319 (void) dlclose(_topo_dlhandle);
320 _topo_dlhandle = NULL;
321 }
322 }
323
324 /*
325 * Refresh the mappings from device path -> FMRI. We do this by walking the
326 * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
327 * Note that we strip out the disk-specific authority information (serial,
328 * part, revision, etc) so that we are left with only the identifying
329 * characteristics of the slot (hc path and chassis-id).
330 */
331 void
332 libzfs_fru_refresh(libzfs_handle_t *hdl)
333 {
334 int err;
335 char *uuid;
336 topo_hdl_t *thp;
337 topo_walk_t *twp;
338
339 if (_topo_dlhandle == NULL)
340 return;
341
342 /*
343 * Clear the FRU hash and initialize our basic structures.
344 */
345 libzfs_fru_clear(hdl, B_FALSE);
346
347 if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
348 NULL, &err)) == NULL)
349 return;
350
351 thp = hdl->libzfs_topo_hdl;
352
353 if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
354 return;
355
356 _topo_hdl_strfree(thp, uuid);
357
358 if (hdl->libzfs_fru_hash == NULL &&
359 (hdl->libzfs_fru_hash =
360 calloc(ZFS_FRU_HASH_SIZE, sizeof (void *))) == NULL)
361 return;
362
363 /*
364 * We now have a topo snapshot, so iterate over the hc topology looking
365 * for disks to add to the hash.
366 */
367 twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
368 libzfs_fru_gather, hdl, &err);
369 if (twp != NULL) {
370 int status;
371
372 status = _topo_walk_step(twp, TOPO_WALK_CHILD);
373 assert(status != TOPO_WALK_NEXT);
374 _topo_walk_fini(twp);
375 }
376 }
377
378 /*
379 * Given a devfs path, return the FRU for the device, if known. This will
380 * automatically call libzfs_fru_refresh() if it hasn't already been called by
381 * the consumer. The string returned is valid until the next call to
382 * libzfs_fru_refresh().
383 */
384 const char *
385 libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
386 {
387 size_t idx = fru_strhash(devpath);
388 libzfs_fru_t *frup;
389
390 if (hdl->libzfs_fru_hash == NULL)
391 libzfs_fru_refresh(hdl);
392
393 if (hdl->libzfs_fru_hash == NULL)
394 return (NULL);
395
396 for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
397 frup = frup->zf_chain) {
398 if (strcmp(devpath, frup->zf_device) == 0)
399 return (frup->zf_fru);
400 }
401
402 return (NULL);
403 }
404
405 /*
406 * Given a fru path, return the device path. This will automatically call
407 * libzfs_fru_refresh() if it hasn't already been called by the consumer. The
408 * string returned is valid until the next call to libzfs_fru_refresh().
409 */
410 const char *
411 libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
412 {
413 libzfs_fru_t *frup;
414 size_t idx;
415
416 if (hdl->libzfs_fru_hash == NULL)
417 libzfs_fru_refresh(hdl);
418
419 if (hdl->libzfs_fru_hash == NULL)
420 return (NULL);
421
422 for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
423 for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
424 frup = frup->zf_next) {
425 if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
426 fru, frup->zf_fru))
427 return (frup->zf_device);
428 }
429 }
430
431 return (NULL);
432 }
433
434 /*
435 * Change the stored FRU for the given vdev.
436 */
437 int
438 zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
439 {
440 zfs_cmd_t zc = { 0 };
441
442 (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
443 (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
444 zc.zc_guid = vdev_guid;
445
446 if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
447 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
448 dgettext(TEXT_DOMAIN, "cannot set FRU")));
449
450 return (0);
451 }
452
453 /*
454 * Compare to two FRUs, ignoring any authority information.
455 */
456 boolean_t
457 libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
458 {
459 if (hdl->libzfs_fru_hash == NULL)
460 libzfs_fru_refresh(hdl);
461
462 if (hdl->libzfs_fru_hash == NULL)
463 return (strcmp(a, b) == 0);
464
465 return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
466 }
467
468 /*
469 * This special function checks to see whether the FRU indicates it's supposed
470 * to be in the system chassis, but the chassis-id doesn't match. This can
471 * happen in a clustered case, where both head nodes have the same logical
472 * disk, but opening the device on the other head node is meaningless.
473 */
474 boolean_t
475 libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
476 {
477 const char *chassisid;
478 size_t len;
479
480 if (hdl->libzfs_fru_hash == NULL)
481 libzfs_fru_refresh(hdl);
482
483 if (hdl->libzfs_chassis_id[0] == '\0')
484 return (B_FALSE);
485
486 if (strstr(fru, "/chassis=0/") == NULL)
487 return (B_FALSE);
488
489 if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
490 return (B_FALSE);
491
492 chassisid += 12;
493 len = strlen(hdl->libzfs_chassis_id);
494 if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
495 (chassisid[len] == '/' || chassisid[len] == ':'))
496 return (B_FALSE);
497
498 return (B_TRUE);
499 }
500
501 /*
502 * Check if both FRUs belong to the same enclosure.
503 */
504 boolean_t
505 libzfs_fru_cmp_enclosure(const char *fru_a, const char *fru_b)
506 {
507 int a, b;
508 char *encl_a, *encl_b;
509 const char *encl_str = "/ses-enclosure=";
510 size_t encl_str_len = strlen(encl_str);
511
512 encl_a = strstr(fru_a, encl_str);
513 encl_b = strstr(fru_b, encl_str);
514 /* If both FRUs don't contain enclosure field, consider it a match */
515 if (encl_a == NULL && encl_b == NULL)
516 return (B_TRUE);
517 /* If one FRU has the enclosure field, but the other one doesn't */
518 if (encl_a == NULL || encl_b == NULL)
519 return (B_FALSE);
520
521 encl_a += encl_str_len;
522 encl_b += encl_str_len;
523 if (sscanf(encl_a, "%d", &a) != 1 || sscanf(encl_b, "%d", &b) != 1)
524 return (B_FALSE);
525
526 return (a == b);
527 }
528
529 /*
530 * Clear memory associated with the FRU hash.
531 */
532 void
533 libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
534 {
535 libzfs_fru_t *frup;
536
537 while ((frup = hdl->libzfs_fru_list) != NULL) {
538 hdl->libzfs_fru_list = frup->zf_next;
539 free(frup->zf_device);
540 free(frup->zf_fru);
541 free(frup);
542 }
543
544 hdl->libzfs_fru_list = NULL;
545
546 if (hdl->libzfs_topo_hdl != NULL) {
547 _topo_snap_release(hdl->libzfs_topo_hdl);
548 _topo_close(hdl->libzfs_topo_hdl);
549 hdl->libzfs_topo_hdl = NULL;
550 }
551
552 if (final) {
553 free(hdl->libzfs_fru_hash);
554 } else if (hdl->libzfs_fru_hash != NULL) {
555 bzero(hdl->libzfs_fru_hash,
556 ZFS_FRU_HASH_SIZE * sizeof (void *));
557 }
558 }