1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <libzfs.h>
  27 
  28 #include <sys/zfs_context.h>
  29 
  30 #include <errno.h>
  31 #include <fcntl.h>
  32 #include <stdarg.h>
  33 #include <stddef.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <strings.h>
  37 #include <sys/file.h>
  38 #include <sys/mntent.h>
  39 #include <sys/mnttab.h>
  40 #include <sys/param.h>
  41 #include <sys/stat.h>
  42 
  43 #include <sys/dmu.h>
  44 #include <sys/dmu_objset.h>
  45 #include <sys/dnode.h>
  46 #include <sys/vdev_impl.h>
  47 
  48 #include <sys/mkdev.h>
  49 
  50 #include "zinject.h"
  51 
  52 extern void kernel_init(int);
  53 extern void kernel_fini(void);
  54 
  55 static int debug;
  56 
  57 static void
  58 ziprintf(const char *fmt, ...)
  59 {
  60         va_list ap;
  61 
  62         if (!debug)
  63                 return;
  64 
  65         va_start(ap, fmt);
  66         (void) vprintf(fmt, ap);
  67         va_end(ap);
  68 }
  69 
  70 static void
  71 compress_slashes(const char *src, char *dest)
  72 {
  73         while (*src != '\0') {
  74                 *dest = *src++;
  75                 while (*dest == '/' && *src == '/')
  76                         ++src;
  77                 ++dest;
  78         }
  79         *dest = '\0';
  80 }
  81 
  82 /*
  83  * Given a full path to a file, translate into a dataset name and a relative
  84  * path within the dataset.  'dataset' must be at least MAXNAMELEN characters,
  85  * and 'relpath' must be at least MAXPATHLEN characters.  We also pass a stat64
  86  * buffer, which we need later to get the object ID.
  87  */
  88 static int
  89 parse_pathname(const char *inpath, char *dataset, char *relpath,
  90     struct stat64 *statbuf)
  91 {
  92         struct extmnttab mp;
  93         FILE *fp;
  94         int match;
  95         const char *rel;
  96         char fullpath[MAXPATHLEN];
  97 
  98         compress_slashes(inpath, fullpath);
  99 
 100         if (fullpath[0] != '/') {
 101                 (void) fprintf(stderr, "invalid object '%s': must be full "
 102                     "path\n", fullpath);
 103                 usage();
 104                 return (-1);
 105         }
 106 
 107         if (strlen(fullpath) >= MAXPATHLEN) {
 108                 (void) fprintf(stderr, "invalid object; pathname too long\n");
 109                 return (-1);
 110         }
 111 
 112         if (stat64(fullpath, statbuf) != 0) {
 113                 (void) fprintf(stderr, "cannot open '%s': %s\n",
 114                     fullpath, strerror(errno));
 115                 return (-1);
 116         }
 117 
 118         if ((fp = fopen(MNTTAB, "r")) == NULL) {
 119                 (void) fprintf(stderr, "cannot open /etc/mnttab\n");
 120                 return (-1);
 121         }
 122 
 123         match = 0;
 124         while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
 125                 if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
 126                         match = 1;
 127                         break;
 128                 }
 129         }
 130 
 131         if (!match) {
 132                 (void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
 133                     fullpath);
 134                 return (-1);
 135         }
 136 
 137         if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) {
 138                 (void) fprintf(stderr, "invalid path '%s': not a ZFS "
 139                     "filesystem\n", fullpath);
 140                 return (-1);
 141         }
 142 
 143         if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) {
 144                 (void) fprintf(stderr, "invalid path '%s': mountpoint "
 145                     "doesn't match path\n", fullpath);
 146                 return (-1);
 147         }
 148 
 149         (void) strcpy(dataset, mp.mnt_special);
 150 
 151         rel = fullpath + strlen(mp.mnt_mountp);
 152         if (rel[0] == '/')
 153                 rel++;
 154         (void) strcpy(relpath, rel);
 155 
 156         return (0);
 157 }
 158 
 159 /*
 160  * Convert from a (dataset, path) pair into a (objset, object) pair.  Note that
 161  * we grab the object number from the inode number, since looking this up via
 162  * libzpool is a real pain.
 163  */
 164 /* ARGSUSED */
 165 static int
 166 object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
 167     zinject_record_t *record)
 168 {
 169         objset_t *os;
 170         int err;
 171 
 172         /*
 173          * Before doing any libzpool operations, call sync() to ensure that the
 174          * on-disk state is consistent with the in-core state.
 175          */
 176         sync();
 177 
 178         err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
 179         if (err != 0) {
 180                 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 181                     dataset, strerror(err));
 182                 return (-1);
 183         }
 184 
 185         record->zi_objset = dmu_objset_id(os);
 186         record->zi_object = statbuf->st_ino;
 187 
 188         dmu_objset_disown(os, FTAG);
 189 
 190         return (0);
 191 }
 192 
 193 /*
 194  * Calculate the real range based on the type, level, and range given.
 195  */
 196 static int
 197 calculate_range(const char *dataset, err_type_t type, int level, char *range,
 198     zinject_record_t *record)
 199 {
 200         objset_t *os = NULL;
 201         dnode_t *dn = NULL;
 202         int err;
 203         int ret = -1;
 204 
 205         /*
 206          * Determine the numeric range from the string.
 207          */
 208         if (range == NULL) {
 209                 /*
 210                  * If range is unspecified, set the range to [0,-1], which
 211                  * indicates that the whole object should be treated as an
 212                  * error.
 213                  */
 214                 record->zi_start = 0;
 215                 record->zi_end = -1ULL;
 216         } else {
 217                 char *end;
 218 
 219                 /* XXX add support for suffixes */
 220                 record->zi_start = strtoull(range, &end, 10);
 221 
 222 
 223                 if (*end == '\0')
 224                         record->zi_end = record->zi_start + 1;
 225                 else if (*end == ',')
 226                         record->zi_end = strtoull(end + 1, &end, 10);
 227 
 228                 if (*end != '\0') {
 229                         (void) fprintf(stderr, "invalid range '%s': must be "
 230                             "a numeric range of the form 'start[,end]'\n",
 231                             range);
 232                         goto out;
 233                 }
 234         }
 235 
 236         switch (type) {
 237         case TYPE_DATA:
 238                 break;
 239 
 240         case TYPE_DNODE:
 241                 /*
 242                  * If this is a request to inject faults into the dnode, then we
 243                  * must translate the current (objset,object) pair into an
 244                  * offset within the metadnode for the objset.  Specifying any
 245                  * kind of range with type 'dnode' is illegal.
 246                  */
 247                 if (range != NULL) {
 248                         (void) fprintf(stderr, "range cannot be specified when "
 249                             "type is 'dnode'\n");
 250                         goto out;
 251                 }
 252 
 253                 record->zi_start = record->zi_object * sizeof (dnode_phys_t);
 254                 record->zi_end = record->zi_start + sizeof (dnode_phys_t);
 255                 record->zi_object = 0;
 256                 break;
 257         }
 258 
 259         /*
 260          * Get the dnode associated with object, so we can calculate the block
 261          * size.
 262          */
 263         if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
 264             B_TRUE, FTAG, &os)) != 0) {
 265                 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 266                     dataset, strerror(err));
 267                 goto out;
 268         }
 269 
 270         if (record->zi_object == 0) {
 271                 dn = DMU_META_DNODE(os);
 272         } else {
 273                 err = dnode_hold(os, record->zi_object, FTAG, &dn);
 274                 if (err != 0) {
 275                         (void) fprintf(stderr, "failed to hold dnode "
 276                             "for object %llu\n",
 277                             (u_longlong_t)record->zi_object);
 278                         goto out;
 279                 }
 280         }
 281 
 282 
 283         ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
 284         ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
 285 
 286         /*
 287          * Translate range into block IDs.
 288          */
 289         if (record->zi_start != 0 || record->zi_end != -1ULL) {
 290                 record->zi_start >>= dn->dn_datablkshift;
 291                 record->zi_end >>= dn->dn_datablkshift;
 292         }
 293 
 294         /*
 295          * Check level, and then translate level 0 blkids into ranges
 296          * appropriate for level of indirection.
 297          */
 298         record->zi_level = level;
 299         if (level > 0) {
 300                 ziprintf("level 0 blkid range: [%llu, %llu]\n",
 301                     record->zi_start, record->zi_end);
 302 
 303                 if (level >= dn->dn_nlevels) {
 304                         (void) fprintf(stderr, "level %d exceeds max level "
 305                             "of object (%d)\n", level, dn->dn_nlevels - 1);
 306                         goto out;
 307                 }
 308 
 309                 if (record->zi_start != 0 || record->zi_end != 0) {
 310                         int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 311 
 312                         for (; level > 0; level--) {
 313                                 record->zi_start >>= shift;
 314                                 record->zi_end >>= shift;
 315                         }
 316                 }
 317         }
 318 
 319         ret = 0;
 320 out:
 321         if (dn) {
 322                 if (dn != DMU_META_DNODE(os))
 323                         dnode_rele(dn, FTAG);
 324         }
 325         if (os)
 326                 dmu_objset_disown(os, FTAG);
 327 
 328         return (ret);
 329 }
 330 
 331 int
 332 translate_record(err_type_t type, const char *object, const char *range,
 333     int level, zinject_record_t *record, char *poolname, char *dataset)
 334 {
 335         char path[MAXPATHLEN];
 336         char *slash;
 337         struct stat64 statbuf;
 338         int ret = -1;
 339 
 340         kernel_init(FREAD);
 341 
 342         debug = (getenv("ZINJECT_DEBUG") != NULL);
 343 
 344         ziprintf("translating: %s\n", object);
 345 
 346         if (MOS_TYPE(type)) {
 347                 /*
 348                  * MOS objects are treated specially.
 349                  */
 350                 switch (type) {
 351                 case TYPE_MOS:
 352                         record->zi_type = 0;
 353                         break;
 354                 case TYPE_MOSDIR:
 355                         record->zi_type = DMU_OT_OBJECT_DIRECTORY;
 356                         break;
 357                 case TYPE_METASLAB:
 358                         record->zi_type = DMU_OT_OBJECT_ARRAY;
 359                         break;
 360                 case TYPE_CONFIG:
 361                         record->zi_type = DMU_OT_PACKED_NVLIST;
 362                         break;
 363                 case TYPE_BPOBJ:
 364                         record->zi_type = DMU_OT_BPOBJ;
 365                         break;
 366                 case TYPE_SPACEMAP:
 367                         record->zi_type = DMU_OT_SPACE_MAP;
 368                         break;
 369                 case TYPE_ERRLOG:
 370                         record->zi_type = DMU_OT_ERROR_LOG;
 371                         break;
 372                 }
 373 
 374                 dataset[0] = '\0';
 375                 (void) strcpy(poolname, object);
 376                 return (0);
 377         }
 378 
 379         /*
 380          * Convert a full path into a (dataset, file) pair.
 381          */
 382         if (parse_pathname(object, dataset, path, &statbuf) != 0)
 383                 goto err;
 384 
 385         ziprintf("   dataset: %s\n", dataset);
 386         ziprintf("      path: %s\n", path);
 387 
 388         /*
 389          * Convert (dataset, file) into (objset, object)
 390          */
 391         if (object_from_path(dataset, path, &statbuf, record) != 0)
 392                 goto err;
 393 
 394         ziprintf("raw objset: %llu\n", record->zi_objset);
 395         ziprintf("raw object: %llu\n", record->zi_object);
 396 
 397         /*
 398          * For the given object, calculate the real (type, level, range)
 399          */
 400         if (calculate_range(dataset, type, level, (char *)range, record) != 0)
 401                 goto err;
 402 
 403         ziprintf("    objset: %llu\n", record->zi_objset);
 404         ziprintf("    object: %llu\n", record->zi_object);
 405         if (record->zi_start == 0 &&
 406             record->zi_end == -1ULL)
 407                 ziprintf("     range: all\n");
 408         else
 409                 ziprintf("     range: [%llu, %llu]\n", record->zi_start,
 410                     record->zi_end);
 411 
 412         /*
 413          * Copy the pool name
 414          */
 415         (void) strcpy(poolname, dataset);
 416         if ((slash = strchr(poolname, '/')) != NULL)
 417                 *slash = '\0';
 418 
 419         ret = 0;
 420 
 421 err:
 422         kernel_fini();
 423         return (ret);
 424 }
 425 
 426 int
 427 translate_raw(const char *str, zinject_record_t *record)
 428 {
 429         /*
 430          * A raw bookmark of the form objset:object:level:blkid, where each
 431          * number is a hexidecimal value.
 432          */
 433         if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
 434             (u_longlong_t *)&record->zi_object, &record->zi_level,
 435             (u_longlong_t *)&record->zi_start) != 4) {
 436                 (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
 437                     "'objset:object:level:blkid'\n", str);
 438                 return (-1);
 439         }
 440 
 441         record->zi_end = record->zi_start;
 442 
 443         return (0);
 444 }
 445 
 446 int
 447 translate_device(const char *pool, const char *device, err_type_t label_type,
 448     zinject_record_t *record)
 449 {
 450         char *end;
 451         zpool_handle_t *zhp;
 452         nvlist_t *tgt;
 453         boolean_t isspare, iscache;
 454 
 455         /*
 456          * Given a device name or GUID, create an appropriate injection record
 457          * with zi_guid set.
 458          */
 459         if ((zhp = zpool_open(g_zfs, pool)) == NULL)
 460                 return (-1);
 461 
 462         record->zi_guid = strtoull(device, &end, 16);
 463         if (record->zi_guid == 0 || *end != '\0') {
 464                 tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL);
 465 
 466                 if (tgt == NULL) {
 467                         (void) fprintf(stderr, "cannot find device '%s' in "
 468                             "pool '%s'\n", device, pool);
 469                         return (-1);
 470                 }
 471 
 472                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
 473                     &record->zi_guid) == 0);
 474         }
 475 
 476         /*
 477          * Device faults can take on three different forms:
 478          * 1). delayed or hanging I/O
 479          * 2). zfs label faults
 480          * 3). generic disk faults
 481          */
 482         if (record->zi_timer != 0) {
 483                 record->zi_cmd = ZINJECT_DELAY_IO;
 484         } else if (label_type != TYPE_INVAL) {
 485                 record->zi_cmd = ZINJECT_LABEL_FAULT;
 486         } else {
 487                 record->zi_cmd = ZINJECT_DEVICE_FAULT;
 488         }
 489 
 490         switch (label_type) {
 491         case TYPE_LABEL_UBERBLOCK:
 492                 record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
 493                 record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
 494                 break;
 495         case TYPE_LABEL_NVLIST:
 496                 record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
 497                 record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
 498                 break;
 499         case TYPE_LABEL_PAD1:
 500                 record->zi_start = offsetof(vdev_label_t, vl_pad1);
 501                 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 502                 break;
 503         case TYPE_LABEL_PAD2:
 504                 record->zi_start = offsetof(vdev_label_t, vl_pad2);
 505                 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 506                 break;
 507         }
 508         return (0);
 509 }