1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2017 Nexenta Systems, Inc.
  26  */
  27 
  28 #include <dlfcn.h>
  29 #include <errno.h>
  30 #include <libintl.h>
  31 #include <link.h>
  32 #include <pthread.h>
  33 #include <strings.h>
  34 #include <unistd.h>
  35 
  36 #include <libzfs.h>
  37 
  38 #include <fm/libtopo.h>
  39 #include <fm/topo_hc.h>
  40 #include <sys/fm/protocol.h>
  41 #include <sys/systeminfo.h>
  42 
  43 #include "libzfs_impl.h"
  44 
  45 /*
  46  * This file is responsible for determining the relationship between I/O
  47  * devices paths and physical locations.  In the world of MPxIO and external
  48  * enclosures, the device path is not synonymous with the physical location.
  49  * If you remove a drive and insert it into a different slot, it will end up
  50  * with the same path under MPxIO.  If you recable storage enclosures, the
  51  * device paths may change.  All of this makes it difficult to implement the
  52  * 'autoreplace' property, which is supposed to automatically manage disk
  53  * replacement based on physical slot.
  54  *
  55  * In order to work around these limitations, we have a per-vdev FRU property
  56  * that is the libtopo path (minus disk-specific authority information) to the
  57  * physical location of the device on the system.  This is an optional
  58  * property, and is only needed when using the 'autoreplace' property or when
  59  * generating FMA faults against vdevs.
  60  */
  61 
  62 /*
  63  * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
  64  * it is not present.  We only need this once per library instance, so it is
  65  * not part of the libzfs handle.
  66  */
  67 static void *_topo_dlhandle;
  68 static topo_hdl_t *(*_topo_open)(int, const char *, int *);
  69 static void (*_topo_close)(topo_hdl_t *);
  70 static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
  71 static void (*_topo_snap_release)(topo_hdl_t *);
  72 static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
  73     topo_walk_cb_t, void *, int *);
  74 static int (*_topo_walk_step)(topo_walk_t *, int);
  75 static void (*_topo_walk_fini)(topo_walk_t *);
  76 static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
  77 static char *(*_topo_node_name)(tnode_t *);
  78 static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
  79     char **, int *);
  80 static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
  81 static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
  82 static int (*_topo_fmri_str2nvl)(topo_hdl_t *, const char *, nvlist_t **,
  83     int *);
  84 static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
  85     const char *);
  86 
  87 #define ZFS_FRU_HASH_SIZE       257
  88 
  89 static size_t
  90 fru_strhash(const char *key)
  91 {
  92         ulong_t g, h = 0;
  93         const char *p;
  94 
  95         for (p = key; *p != '\0'; p++) {
  96                 h = (h << 4) + *p;
  97 
  98                 if ((g = (h & 0xf0000000)) != 0) {
  99                         h ^= (g >> 24);
 100                         h ^= g;
 101                 }
 102         }
 103 
 104         return (h % ZFS_FRU_HASH_SIZE);
 105 }
 106 
 107 static int
 108 libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
 109 {
 110         libzfs_handle_t *hdl = arg;
 111         nvlist_t *fru;
 112         char *devpath, *frustr;
 113         int err;
 114         libzfs_fru_t *frup;
 115         size_t idx;
 116 
 117         /*
 118          * If this is the chassis node, and we don't yet have the system
 119          * chassis ID, then fill in this value now.
 120          */
 121         if (hdl->libzfs_chassis_id[0] == '\0' &&
 122             strcmp(_topo_node_name(tn), "chassis") == 0) {
 123                 if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
 124                     FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
 125                         (void) strlcpy(hdl->libzfs_chassis_id, devpath,
 126                             sizeof (hdl->libzfs_chassis_id));
 127         }
 128 
 129         /*
 130          * Skip non-disk nodes.
 131          */
 132         if (strcmp(_topo_node_name(tn), "disk") != 0)
 133                 return (TOPO_WALK_NEXT);
 134 
 135         /*
 136          * Get the devfs path and FRU.
 137          */
 138         if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
 139                 return (TOPO_WALK_NEXT);
 140 
 141         if (libzfs_fru_lookup(hdl, devpath) != NULL) {
 142                 _topo_hdl_strfree(thp, devpath);
 143                 return (TOPO_WALK_NEXT);
 144         }
 145 
 146         if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
 147                 _topo_hdl_strfree(thp, devpath);
 148                 return (TOPO_WALK_NEXT);
 149         }
 150 
 151         /*
 152          * Convert the FRU into a string.
 153          */
 154         if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
 155                 nvlist_free(fru);
 156                 _topo_hdl_strfree(thp, devpath);
 157                 return (TOPO_WALK_NEXT);
 158         }
 159 
 160         nvlist_free(fru);
 161 
 162         /*
 163          * Finally, we have a FRU string and device path.  Add it to the hash.
 164          */
 165         if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
 166                 _topo_hdl_strfree(thp, devpath);
 167                 _topo_hdl_strfree(thp, frustr);
 168                 return (TOPO_WALK_NEXT);
 169         }
 170 
 171         if ((frup->zf_device = strdup(devpath)) == NULL ||
 172             (frup->zf_fru = strdup(frustr)) == NULL) {
 173                 free(frup->zf_device);
 174                 free(frup);
 175                 _topo_hdl_strfree(thp, devpath);
 176                 _topo_hdl_strfree(thp, frustr);
 177                 return (TOPO_WALK_NEXT);
 178         }
 179 
 180         _topo_hdl_strfree(thp, devpath);
 181         _topo_hdl_strfree(thp, frustr);
 182 
 183         idx = fru_strhash(frup->zf_device);
 184         frup->zf_chain = hdl->libzfs_fru_hash[idx];
 185         hdl->libzfs_fru_hash[idx] = frup;
 186         frup->zf_next = hdl->libzfs_fru_list;
 187         hdl->libzfs_fru_list = frup;
 188 
 189         return (TOPO_WALK_NEXT);
 190 }
 191 
 192 /*
 193  * Given a disk FRU, check that FRU contains a slot number and remove FRU
 194  * details that aren't needed when comparing FRUs by slot number.
 195  */
 196 static char *
 197 diskfru_to_slot(libzfs_handle_t *hdl, const char *diskfru)
 198 {
 199         nvlist_t *nvl, **hc;
 200         char *hc_name, *tmp = NULL;
 201         int ret, i;
 202         uint_t hc_cnt;
 203 
 204         /* string -> nvlist */
 205         if (_topo_fmri_str2nvl(hdl->libzfs_topo_hdl, diskfru, &nvl, &ret) != 0)
 206                 return (NULL);
 207 
 208         /* Need slot (bay) number in the FRU */
 209         if (nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hc,
 210             &hc_cnt) != 0)
 211                 goto out;
 212 
 213         for (i = 0; i < hc_cnt; i++) {
 214                 if (nvlist_lookup_string(hc[i], FM_FMRI_HC_NAME,
 215                     &hc_name) == 0 && strcmp(hc_name, BAY) == 0)
 216                         break;
 217         }
 218         if (i == hc_cnt)
 219                 goto out;
 220 
 221         /* Drop the unwanted components */
 222         (void) nvlist_remove_all(nvl, FM_FMRI_HC_SERIAL_ID);
 223         (void) nvlist_remove_all(nvl, FM_FMRI_HC_PART);
 224         (void) nvlist_remove_all(nvl, FM_FMRI_HC_REVISION);
 225 
 226         /* nvlist -> string */
 227         if (_topo_fmri_nvl2str(hdl->libzfs_topo_hdl, nvl, &tmp, &ret) != 0)
 228                 tmp = NULL;
 229 out:
 230         nvlist_free(nvl);
 231         return (tmp);
 232 }
 233 
 234 /*
 235  * Check if given FRUs match by slot number to skip comparing disk specific
 236  * fields of the FRU.
 237  */
 238 /* ARGSUSED */
 239 int
 240 libzfs_fru_cmp_slot(libzfs_handle_t *hdl, const char *a, const char *b,
 241     size_t len)
 242 {
 243         char *slota, *slotb;
 244         int ret = -1;
 245 
 246         if (a == NULL || b == NULL)
 247                 return (-1);
 248 
 249         slota = diskfru_to_slot(hdl, a);
 250         slotb = diskfru_to_slot(hdl, b);
 251 
 252         if (slota != NULL && slotb != NULL)
 253                 ret = strcmp(slota, slotb);
 254 
 255         _topo_hdl_strfree(hdl->libzfs_topo_hdl, slota);
 256         _topo_hdl_strfree(hdl->libzfs_topo_hdl, slotb);
 257 
 258         return (ret);
 259 }
 260 
 261 /*
 262  * Called during initialization to setup the dynamic libtopo connection.
 263  */
 264 #pragma init(libzfs_init_fru)
 265 static void
 266 libzfs_init_fru(void)
 267 {
 268         char path[MAXPATHLEN];
 269         char isa[257];
 270 
 271 #if defined(_LP64)
 272         if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
 273                 isa[0] = '\0';
 274 #else
 275         isa[0] = '\0';
 276 #endif
 277         (void) snprintf(path, sizeof (path),
 278             "/usr/lib/fm/%s/libtopo.so", isa);
 279 
 280         if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
 281                 return;
 282 
 283         _topo_open = (topo_hdl_t *(*)())
 284             dlsym(_topo_dlhandle, "topo_open");
 285         _topo_close = (void (*)())
 286             dlsym(_topo_dlhandle, "topo_close");
 287         _topo_snap_hold = (char *(*)())
 288             dlsym(_topo_dlhandle, "topo_snap_hold");
 289         _topo_snap_release = (void (*)())
 290             dlsym(_topo_dlhandle, "topo_snap_release");
 291         _topo_walk_init = (topo_walk_t *(*)())
 292             dlsym(_topo_dlhandle, "topo_walk_init");
 293         _topo_walk_step = (int (*)())
 294             dlsym(_topo_dlhandle, "topo_walk_step");
 295         _topo_walk_fini = (void (*)())
 296             dlsym(_topo_dlhandle, "topo_walk_fini");
 297         _topo_hdl_strfree = (void (*)())
 298             dlsym(_topo_dlhandle, "topo_hdl_strfree");
 299         _topo_node_name = (char *(*)())
 300             dlsym(_topo_dlhandle, "topo_node_name");
 301         _topo_prop_get_string = (int (*)())
 302             dlsym(_topo_dlhandle, "topo_prop_get_string");
 303         _topo_node_fru = (int (*)())
 304             dlsym(_topo_dlhandle, "topo_node_fru");
 305         _topo_fmri_nvl2str = (int (*)())
 306             dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
 307         _topo_fmri_str2nvl = (int (*)())
 308             dlsym(_topo_dlhandle, "topo_fmri_str2nvl");
 309         _topo_fmri_strcmp_noauth = (int (*)())
 310             dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
 311 
 312         if (_topo_open == NULL || _topo_close == NULL ||
 313             _topo_snap_hold == NULL || _topo_snap_release == NULL ||
 314             _topo_walk_init == NULL || _topo_walk_step == NULL ||
 315             _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
 316             _topo_node_name == NULL || _topo_prop_get_string == NULL ||
 317             _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
 318             _topo_fmri_str2nvl == NULL || _topo_fmri_strcmp_noauth == NULL) {
 319                 (void) dlclose(_topo_dlhandle);
 320                 _topo_dlhandle = NULL;
 321         }
 322 }
 323 
 324 /*
 325  * Refresh the mappings from device path -> FMRI.  We do this by walking the
 326  * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
 327  * Note that we strip out the disk-specific authority information (serial,
 328  * part, revision, etc) so that we are left with only the identifying
 329  * characteristics of the slot (hc path and chassis-id).
 330  */
 331 void
 332 libzfs_fru_refresh(libzfs_handle_t *hdl)
 333 {
 334         int err;
 335         char *uuid;
 336         topo_hdl_t *thp;
 337         topo_walk_t *twp;
 338 
 339         if (_topo_dlhandle == NULL)
 340                 return;
 341 
 342         /*
 343          * Clear the FRU hash and initialize our basic structures.
 344          */
 345         libzfs_fru_clear(hdl, B_FALSE);
 346 
 347         if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
 348             NULL, &err)) == NULL)
 349                 return;
 350 
 351         thp = hdl->libzfs_topo_hdl;
 352 
 353         if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
 354                 return;
 355 
 356         _topo_hdl_strfree(thp, uuid);
 357 
 358         if (hdl->libzfs_fru_hash == NULL &&
 359             (hdl->libzfs_fru_hash =
 360             calloc(ZFS_FRU_HASH_SIZE, sizeof (void *))) == NULL)
 361                 return;
 362 
 363         /*
 364          * We now have a topo snapshot, so iterate over the hc topology looking
 365          * for disks to add to the hash.
 366          */
 367         twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
 368             libzfs_fru_gather, hdl, &err);
 369         if (twp != NULL) {
 370                 int status;
 371 
 372                 status = _topo_walk_step(twp, TOPO_WALK_CHILD);
 373                 assert(status != TOPO_WALK_NEXT);
 374                 _topo_walk_fini(twp);
 375         }
 376 }
 377 
 378 /*
 379  * Given a devfs path, return the FRU for the device, if known.  This will
 380  * automatically call libzfs_fru_refresh() if it hasn't already been called by
 381  * the consumer.  The string returned is valid until the next call to
 382  * libzfs_fru_refresh().
 383  */
 384 const char *
 385 libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
 386 {
 387         size_t idx = fru_strhash(devpath);
 388         libzfs_fru_t *frup;
 389 
 390         if (hdl->libzfs_fru_hash == NULL)
 391                 libzfs_fru_refresh(hdl);
 392 
 393         if (hdl->libzfs_fru_hash == NULL)
 394                 return (NULL);
 395 
 396         for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
 397             frup = frup->zf_chain) {
 398                 if (strcmp(devpath, frup->zf_device) == 0)
 399                         return (frup->zf_fru);
 400         }
 401 
 402         return (NULL);
 403 }
 404 
 405 /*
 406  * Given a fru path, return the device path.  This will automatically call
 407  * libzfs_fru_refresh() if it hasn't already been called by the consumer.  The
 408  * string returned is valid until the next call to libzfs_fru_refresh().
 409  */
 410 const char *
 411 libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
 412 {
 413         libzfs_fru_t *frup;
 414         size_t idx;
 415 
 416         if (hdl->libzfs_fru_hash == NULL)
 417                 libzfs_fru_refresh(hdl);
 418 
 419         if (hdl->libzfs_fru_hash == NULL)
 420                 return (NULL);
 421 
 422         for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
 423                 for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
 424                     frup = frup->zf_next) {
 425                         if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
 426                             fru, frup->zf_fru))
 427                                 return (frup->zf_device);
 428                 }
 429         }
 430 
 431         return (NULL);
 432 }
 433 
 434 /*
 435  * Change the stored FRU for the given vdev.
 436  */
 437 int
 438 zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
 439 {
 440         zfs_cmd_t zc = { 0 };
 441 
 442         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 443         (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
 444         zc.zc_guid = vdev_guid;
 445 
 446         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
 447                 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
 448                     dgettext(TEXT_DOMAIN, "cannot set FRU")));
 449 
 450         return (0);
 451 }
 452 
 453 /*
 454  * Compare to two FRUs, ignoring any authority information.
 455  */
 456 boolean_t
 457 libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
 458 {
 459         if (hdl->libzfs_fru_hash == NULL)
 460                 libzfs_fru_refresh(hdl);
 461 
 462         if (hdl->libzfs_fru_hash == NULL)
 463                 return (strcmp(a, b) == 0);
 464 
 465         return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
 466 }
 467 
 468 /*
 469  * This special function checks to see whether the FRU indicates it's supposed
 470  * to be in the system chassis, but the chassis-id doesn't match.  This can
 471  * happen in a clustered case, where both head nodes have the same logical
 472  * disk, but opening the device on the other head node is meaningless.
 473  */
 474 boolean_t
 475 libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
 476 {
 477         const char *chassisid;
 478         size_t len;
 479 
 480         if (hdl->libzfs_fru_hash == NULL)
 481                 libzfs_fru_refresh(hdl);
 482 
 483         if (hdl->libzfs_chassis_id[0] == '\0')
 484                 return (B_FALSE);
 485 
 486         if (strstr(fru, "/chassis=0/") == NULL)
 487                 return (B_FALSE);
 488 
 489         if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
 490                 return (B_FALSE);
 491 
 492         chassisid += 12;
 493         len = strlen(hdl->libzfs_chassis_id);
 494         if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
 495             (chassisid[len] == '/' || chassisid[len] == ':'))
 496                 return (B_FALSE);
 497 
 498         return (B_TRUE);
 499 }
 500 
 501 /*
 502  * Check if both FRUs belong to the same enclosure.
 503  */
 504 boolean_t
 505 libzfs_fru_cmp_enclosure(const char *fru_a, const char *fru_b)
 506 {
 507         int a, b;
 508         char *encl_a, *encl_b;
 509         const char *encl_str = "/ses-enclosure=";
 510         size_t encl_str_len = strlen(encl_str);
 511 
 512         encl_a = strstr(fru_a, encl_str);
 513         encl_b = strstr(fru_b, encl_str);
 514         /* If both FRUs don't contain enclosure field, consider it a match */
 515         if (encl_a == NULL && encl_b == NULL)
 516                 return (B_TRUE);
 517         /* If one FRU has the enclosure field, but the other one doesn't */
 518         if (encl_a == NULL || encl_b == NULL)
 519                 return (B_FALSE);
 520 
 521         encl_a += encl_str_len;
 522         encl_b += encl_str_len;
 523         if (sscanf(encl_a, "%d", &a) != 1 || sscanf(encl_b, "%d", &b) != 1)
 524                 return (B_FALSE);
 525 
 526         return (a == b);
 527 }
 528 
 529 /*
 530  * Clear memory associated with the FRU hash.
 531  */
 532 void
 533 libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
 534 {
 535         libzfs_fru_t *frup;
 536 
 537         while ((frup = hdl->libzfs_fru_list) != NULL) {
 538                 hdl->libzfs_fru_list = frup->zf_next;
 539                 free(frup->zf_device);
 540                 free(frup->zf_fru);
 541                 free(frup);
 542         }
 543 
 544         hdl->libzfs_fru_list = NULL;
 545 
 546         if (hdl->libzfs_topo_hdl != NULL) {
 547                 _topo_snap_release(hdl->libzfs_topo_hdl);
 548                 _topo_close(hdl->libzfs_topo_hdl);
 549                 hdl->libzfs_topo_hdl = NULL;
 550         }
 551 
 552         if (final) {
 553                 free(hdl->libzfs_fru_hash);
 554         } else if (hdl->libzfs_fru_hash != NULL) {
 555                 bzero(hdl->libzfs_fru_hash,
 556                     ZFS_FRU_HASH_SIZE * sizeof (void *));
 557         }
 558 }