1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 
  27 #include <libzfs.h>
  28 
  29 #include <sys/zfs_context.h>
  30 
  31 #include <errno.h>
  32 #include <fcntl.h>
  33 #include <stdarg.h>
  34 #include <stddef.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <strings.h>
  38 #include <sys/file.h>
  39 #include <sys/mntent.h>
  40 #include <sys/mnttab.h>
  41 #include <sys/param.h>
  42 #include <sys/stat.h>
  43 
  44 #include <sys/dmu.h>
  45 #include <sys/dmu_objset.h>
  46 #include <sys/dnode.h>
  47 #include <sys/vdev_impl.h>
  48 
  49 #include <sys/mkdev.h>
  50 
  51 #include "zinject.h"
  52 
  53 extern void kernel_init(int);
  54 extern void kernel_fini(void);
  55 
  56 static int debug;
  57 
  58 static void
  59 ziprintf(const char *fmt, ...)
  60 {
  61         va_list ap;
  62 
  63         if (!debug)
  64                 return;
  65 
  66         va_start(ap, fmt);
  67         (void) vprintf(fmt, ap);
  68         va_end(ap);
  69 }
  70 
  71 static void
  72 compress_slashes(const char *src, char *dest)
  73 {
  74         while (*src != '\0') {
  75                 *dest = *src++;
  76                 while (*dest == '/' && *src == '/')
  77                         ++src;
  78                 ++dest;
  79         }
  80         *dest = '\0';
  81 }
  82 
  83 /*
  84  * Given a full path to a file, translate into a dataset name and a relative
  85  * path within the dataset.  'dataset' must be at least MAXNAMELEN characters,
  86  * and 'relpath' must be at least MAXPATHLEN characters.  We also pass a stat64
  87  * buffer, which we need later to get the object ID.
  88  */
  89 static int
  90 parse_pathname(const char *inpath, char *dataset, char *relpath,
  91     struct stat64 *statbuf)
  92 {
  93         struct extmnttab mp;
  94         FILE *fp;
  95         int match;
  96         const char *rel;
  97         char fullpath[MAXPATHLEN];
  98 
  99         compress_slashes(inpath, fullpath);
 100 
 101         if (fullpath[0] != '/') {
 102                 (void) fprintf(stderr, "invalid object '%s': must be full "
 103                     "path\n", fullpath);
 104                 usage();
 105                 return (-1);
 106         }
 107 
 108         if (strlen(fullpath) >= MAXPATHLEN) {
 109                 (void) fprintf(stderr, "invalid object; pathname too long\n");
 110                 return (-1);
 111         }
 112 
 113         if (stat64(fullpath, statbuf) != 0) {
 114                 (void) fprintf(stderr, "cannot open '%s': %s\n",
 115                     fullpath, strerror(errno));
 116                 return (-1);
 117         }
 118 
 119         if ((fp = fopen(MNTTAB, "r")) == NULL) {
 120                 (void) fprintf(stderr, "cannot open /etc/mnttab\n");
 121                 return (-1);
 122         }
 123 
 124         match = 0;
 125         while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
 126                 if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
 127                         match = 1;
 128                         break;
 129                 }
 130         }
 131 
 132         if (!match) {
 133                 (void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
 134                     fullpath);
 135                 return (-1);
 136         }
 137 
 138         if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) {
 139                 (void) fprintf(stderr, "invalid path '%s': not a ZFS "
 140                     "filesystem\n", fullpath);
 141                 return (-1);
 142         }
 143 
 144         if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) {
 145                 (void) fprintf(stderr, "invalid path '%s': mountpoint "
 146                     "doesn't match path\n", fullpath);
 147                 return (-1);
 148         }
 149 
 150         (void) strcpy(dataset, mp.mnt_special);
 151 
 152         rel = fullpath + strlen(mp.mnt_mountp);
 153         if (rel[0] == '/')
 154                 rel++;
 155         (void) strcpy(relpath, rel);
 156 
 157         return (0);
 158 }
 159 
 160 /*
 161  * Convert from a (dataset, path) pair into a (objset, object) pair.  Note that
 162  * we grab the object number from the inode number, since looking this up via
 163  * libzpool is a real pain.
 164  */
 165 /* ARGSUSED */
 166 static int
 167 object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
 168     zinject_record_t *record)
 169 {
 170         objset_t *os;
 171         int err;
 172 
 173         /*
 174          * Before doing any libzpool operations, call sync() to ensure that the
 175          * on-disk state is consistent with the in-core state.
 176          */
 177         sync();
 178 
 179         err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
 180         if (err != 0) {
 181                 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 182                     dataset, strerror(err));
 183                 return (-1);
 184         }
 185 
 186         record->zi_objset = dmu_objset_id(os);
 187         record->zi_object = statbuf->st_ino;
 188 
 189         dmu_objset_disown(os, FTAG);
 190 
 191         return (0);
 192 }
 193 
 194 /*
 195  * Calculate the real range based on the type, level, and range given.
 196  */
 197 static int
 198 calculate_range(const char *dataset, err_type_t type, int level, char *range,
 199     zinject_record_t *record)
 200 {
 201         objset_t *os = NULL;
 202         dnode_t *dn = NULL;
 203         int err;
 204         int ret = -1;
 205 
 206         /*
 207          * Determine the numeric range from the string.
 208          */
 209         if (range == NULL) {
 210                 /*
 211                  * If range is unspecified, set the range to [0,-1], which
 212                  * indicates that the whole object should be treated as an
 213                  * error.
 214                  */
 215                 record->zi_start = 0;
 216                 record->zi_end = -1ULL;
 217         } else {
 218                 char *end;
 219 
 220                 /* XXX add support for suffixes */
 221                 record->zi_start = strtoull(range, &end, 10);
 222 
 223 
 224                 if (*end == '\0')
 225                         record->zi_end = record->zi_start + 1;
 226                 else if (*end == ',')
 227                         record->zi_end = strtoull(end + 1, &end, 10);
 228 
 229                 if (*end != '\0') {
 230                         (void) fprintf(stderr, "invalid range '%s': must be "
 231                             "a numeric range of the form 'start[,end]'\n",
 232                             range);
 233                         goto out;
 234                 }
 235         }
 236 
 237         switch (type) {
 238         case TYPE_DATA:
 239                 break;
 240 
 241         case TYPE_DNODE:
 242                 /*
 243                  * If this is a request to inject faults into the dnode, then we
 244                  * must translate the current (objset,object) pair into an
 245                  * offset within the metadnode for the objset.  Specifying any
 246                  * kind of range with type 'dnode' is illegal.
 247                  */
 248                 if (range != NULL) {
 249                         (void) fprintf(stderr, "range cannot be specified when "
 250                             "type is 'dnode'\n");
 251                         goto out;
 252                 }
 253 
 254                 record->zi_start = record->zi_object * sizeof (dnode_phys_t);
 255                 record->zi_end = record->zi_start + sizeof (dnode_phys_t);
 256                 record->zi_object = 0;
 257                 break;
 258         }
 259 
 260         /*
 261          * Get the dnode associated with object, so we can calculate the block
 262          * size.
 263          */
 264         if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
 265             B_TRUE, FTAG, &os)) != 0) {
 266                 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 267                     dataset, strerror(err));
 268                 goto out;
 269         }
 270 
 271         if (record->zi_object == 0) {
 272                 dn = DMU_META_DNODE(os);
 273         } else {
 274                 err = dnode_hold(os, record->zi_object, FTAG, &dn);
 275                 if (err != 0) {
 276                         (void) fprintf(stderr, "failed to hold dnode "
 277                             "for object %llu\n",
 278                             (u_longlong_t)record->zi_object);
 279                         goto out;
 280                 }
 281         }
 282 
 283 
 284         ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
 285         ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
 286 
 287         /*
 288          * Translate range into block IDs.
 289          */
 290         if (record->zi_start != 0 || record->zi_end != -1ULL) {
 291                 record->zi_start >>= dn->dn_datablkshift;
 292                 record->zi_end >>= dn->dn_datablkshift;
 293         }
 294 
 295         /*
 296          * Check level, and then translate level 0 blkids into ranges
 297          * appropriate for level of indirection.
 298          */
 299         record->zi_level = level;
 300         if (level > 0) {
 301                 ziprintf("level 0 blkid range: [%llu, %llu]\n",
 302                     record->zi_start, record->zi_end);
 303 
 304                 if (level >= dn->dn_nlevels) {
 305                         (void) fprintf(stderr, "level %d exceeds max level "
 306                             "of object (%d)\n", level, dn->dn_nlevels - 1);
 307                         goto out;
 308                 }
 309 
 310                 if (record->zi_start != 0 || record->zi_end != 0) {
 311                         int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 312 
 313                         for (; level > 0; level--) {
 314                                 record->zi_start >>= shift;
 315                                 record->zi_end >>= shift;
 316                         }
 317                 }
 318         }
 319 
 320         ret = 0;
 321 out:
 322         if (dn) {
 323                 if (dn != DMU_META_DNODE(os))
 324                         dnode_rele(dn, FTAG);
 325         }
 326         if (os)
 327                 dmu_objset_disown(os, FTAG);
 328 
 329         return (ret);
 330 }
 331 
 332 int
 333 translate_record(err_type_t type, const char *object, const char *range,
 334     int level, zinject_record_t *record, char *poolname, char *dataset)
 335 {
 336         char path[MAXPATHLEN];
 337         char *slash;
 338         struct stat64 statbuf;
 339         int ret = -1;
 340 
 341         kernel_init(FREAD);
 342 
 343         debug = (getenv("ZINJECT_DEBUG") != NULL);
 344 
 345         ziprintf("translating: %s\n", object);
 346 
 347         if (MOS_TYPE(type)) {
 348                 /*
 349                  * MOS objects are treated specially.
 350                  */
 351                 switch (type) {
 352                 case TYPE_MOS:
 353                         record->zi_type = 0;
 354                         break;
 355                 case TYPE_MOSDIR:
 356                         record->zi_type = DMU_OT_OBJECT_DIRECTORY;
 357                         break;
 358                 case TYPE_METASLAB:
 359                         record->zi_type = DMU_OT_OBJECT_ARRAY;
 360                         break;
 361                 case TYPE_CONFIG:
 362                         record->zi_type = DMU_OT_PACKED_NVLIST;
 363                         break;
 364                 case TYPE_BPOBJ:
 365                         record->zi_type = DMU_OT_BPOBJ;
 366                         break;
 367                 case TYPE_SPACEMAP:
 368                         record->zi_type = DMU_OT_SPACE_MAP;
 369                         break;
 370                 case TYPE_ERRLOG:
 371                         record->zi_type = DMU_OT_ERROR_LOG;
 372                         break;
 373                 }
 374 
 375                 dataset[0] = '\0';
 376                 (void) strcpy(poolname, object);
 377                 return (0);
 378         }
 379 
 380         /*
 381          * Convert a full path into a (dataset, file) pair.
 382          */
 383         if (parse_pathname(object, dataset, path, &statbuf) != 0)
 384                 goto err;
 385 
 386         ziprintf("   dataset: %s\n", dataset);
 387         ziprintf("      path: %s\n", path);
 388 
 389         /*
 390          * Convert (dataset, file) into (objset, object)
 391          */
 392         if (object_from_path(dataset, path, &statbuf, record) != 0)
 393                 goto err;
 394 
 395         ziprintf("raw objset: %llu\n", record->zi_objset);
 396         ziprintf("raw object: %llu\n", record->zi_object);
 397 
 398         /*
 399          * For the given object, calculate the real (type, level, range)
 400          */
 401         if (calculate_range(dataset, type, level, (char *)range, record) != 0)
 402                 goto err;
 403 
 404         ziprintf("    objset: %llu\n", record->zi_objset);
 405         ziprintf("    object: %llu\n", record->zi_object);
 406         if (record->zi_start == 0 &&
 407             record->zi_end == -1ULL)
 408                 ziprintf("     range: all\n");
 409         else
 410                 ziprintf("     range: [%llu, %llu]\n", record->zi_start,
 411                     record->zi_end);
 412 
 413         /*
 414          * Copy the pool name
 415          */
 416         (void) strcpy(poolname, dataset);
 417         if ((slash = strchr(poolname, '/')) != NULL)
 418                 *slash = '\0';
 419 
 420         ret = 0;
 421 
 422 err:
 423         kernel_fini();
 424         return (ret);
 425 }
 426 
 427 int
 428 translate_raw(const char *str, zinject_record_t *record)
 429 {
 430         /*
 431          * A raw bookmark of the form objset:object:level:blkid, where each
 432          * number is a hexidecimal value.
 433          */
 434         if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
 435             (u_longlong_t *)&record->zi_object, &record->zi_level,
 436             (u_longlong_t *)&record->zi_start) != 4) {
 437                 (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
 438                     "'objset:object:level:blkid'\n", str);
 439                 return (-1);
 440         }
 441 
 442         record->zi_end = record->zi_start;
 443 
 444         return (0);
 445 }
 446 
 447 int
 448 translate_device(const char *pool, const char *device, err_type_t label_type,
 449     zinject_record_t *record)
 450 {
 451         char *end;
 452         zpool_handle_t *zhp;
 453         nvlist_t *tgt;
 454         boolean_t isspare, iscache;
 455 
 456         /*
 457          * Given a device name or GUID, create an appropriate injection record
 458          * with zi_guid set.
 459          */
 460         if ((zhp = zpool_open(g_zfs, pool)) == NULL)
 461                 return (-1);
 462 
 463         record->zi_guid = strtoull(device, &end, 16);
 464         if (record->zi_guid == 0 || *end != '\0') {
 465                 tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL,
 466                     NULL);
 467 
 468                 if (tgt == NULL) {
 469                         (void) fprintf(stderr, "cannot find device '%s' in "
 470                             "pool '%s'\n", device, pool);
 471                         return (-1);
 472                 }
 473 
 474                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
 475                     &record->zi_guid) == 0);
 476         }
 477 
 478         /*
 479          * Device faults can take on three different forms:
 480          * 1). delayed or hanging I/O
 481          * 2). zfs label faults
 482          * 3). generic disk faults
 483          */
 484         if (record->zi_timer != 0) {
 485                 record->zi_cmd = ZINJECT_DELAY_IO;
 486         } else if (label_type != TYPE_INVAL) {
 487                 record->zi_cmd = ZINJECT_LABEL_FAULT;
 488         } else {
 489                 record->zi_cmd = ZINJECT_DEVICE_FAULT;
 490         }
 491 
 492         switch (label_type) {
 493         case TYPE_LABEL_UBERBLOCK:
 494                 record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
 495                 record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
 496                 break;
 497         case TYPE_LABEL_NVLIST:
 498                 record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
 499                 record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
 500                 break;
 501         case TYPE_LABEL_PAD1:
 502                 record->zi_start = offsetof(vdev_label_t, vl_pad1);
 503                 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 504                 break;
 505         case TYPE_LABEL_PAD2:
 506                 record->zi_start = offsetof(vdev_label_t, vl_pad2);
 507                 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 508                 break;
 509         }
 510         return (0);
 511 }