1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 #include <libzfs.h>
28
29 #include <sys/zfs_context.h>
30
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <stdarg.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <sys/file.h>
39 #include <sys/mntent.h>
40 #include <sys/mnttab.h>
41 #include <sys/param.h>
42 #include <sys/stat.h>
43
44 #include <sys/dmu.h>
45 #include <sys/dmu_objset.h>
46 #include <sys/dnode.h>
47 #include <sys/vdev_impl.h>
48
49 #include <sys/mkdev.h>
50
51 #include "zinject.h"
52
53 extern void kernel_init(int);
54 extern void kernel_fini(void);
55
56 static int debug;
57
58 static void
59 ziprintf(const char *fmt, ...)
60 {
61 va_list ap;
62
63 if (!debug)
64 return;
65
66 va_start(ap, fmt);
67 (void) vprintf(fmt, ap);
68 va_end(ap);
69 }
70
71 static void
72 compress_slashes(const char *src, char *dest)
73 {
74 while (*src != '\0') {
75 *dest = *src++;
76 while (*dest == '/' && *src == '/')
77 ++src;
78 ++dest;
79 }
80 *dest = '\0';
81 }
82
83 /*
84 * Given a full path to a file, translate into a dataset name and a relative
85 * path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
86 * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64
87 * buffer, which we need later to get the object ID.
88 */
89 static int
90 parse_pathname(const char *inpath, char *dataset, char *relpath,
91 struct stat64 *statbuf)
92 {
93 struct extmnttab mp;
94 FILE *fp;
95 int match;
96 const char *rel;
97 char fullpath[MAXPATHLEN];
98
99 compress_slashes(inpath, fullpath);
100
101 if (fullpath[0] != '/') {
102 (void) fprintf(stderr, "invalid object '%s': must be full "
103 "path\n", fullpath);
104 usage();
105 return (-1);
106 }
107
108 if (strlen(fullpath) >= MAXPATHLEN) {
109 (void) fprintf(stderr, "invalid object; pathname too long\n");
110 return (-1);
111 }
112
113 if (stat64(fullpath, statbuf) != 0) {
114 (void) fprintf(stderr, "cannot open '%s': %s\n",
115 fullpath, strerror(errno));
116 return (-1);
117 }
118
119 if ((fp = fopen(MNTTAB, "r")) == NULL) {
120 (void) fprintf(stderr, "cannot open /etc/mnttab\n");
121 return (-1);
122 }
123
124 match = 0;
125 while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
126 if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
127 match = 1;
128 break;
129 }
130 }
131
132 if (!match) {
133 (void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
134 fullpath);
135 return (-1);
136 }
137
138 if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) {
139 (void) fprintf(stderr, "invalid path '%s': not a ZFS "
140 "filesystem\n", fullpath);
141 return (-1);
142 }
143
144 if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) {
145 (void) fprintf(stderr, "invalid path '%s': mountpoint "
146 "doesn't match path\n", fullpath);
147 return (-1);
148 }
149
150 (void) strcpy(dataset, mp.mnt_special);
151
152 rel = fullpath + strlen(mp.mnt_mountp);
153 if (rel[0] == '/')
154 rel++;
155 (void) strcpy(relpath, rel);
156
157 return (0);
158 }
159
160 /*
161 * Convert from a (dataset, path) pair into a (objset, object) pair. Note that
162 * we grab the object number from the inode number, since looking this up via
163 * libzpool is a real pain.
164 */
165 /* ARGSUSED */
166 static int
167 object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
168 zinject_record_t *record)
169 {
170 objset_t *os;
171 int err;
172
173 /*
174 * Before doing any libzpool operations, call sync() to ensure that the
175 * on-disk state is consistent with the in-core state.
176 */
177 sync();
178
179 err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
180 if (err != 0) {
181 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
182 dataset, strerror(err));
183 return (-1);
184 }
185
186 record->zi_objset = dmu_objset_id(os);
187 record->zi_object = statbuf->st_ino;
188
189 dmu_objset_disown(os, FTAG);
190
191 return (0);
192 }
193
194 /*
195 * Calculate the real range based on the type, level, and range given.
196 */
197 static int
198 calculate_range(const char *dataset, err_type_t type, int level, char *range,
199 zinject_record_t *record)
200 {
201 objset_t *os = NULL;
202 dnode_t *dn = NULL;
203 int err;
204 int ret = -1;
205
206 /*
207 * Determine the numeric range from the string.
208 */
209 if (range == NULL) {
210 /*
211 * If range is unspecified, set the range to [0,-1], which
212 * indicates that the whole object should be treated as an
213 * error.
214 */
215 record->zi_start = 0;
216 record->zi_end = -1ULL;
217 } else {
218 char *end;
219
220 /* XXX add support for suffixes */
221 record->zi_start = strtoull(range, &end, 10);
222
223
224 if (*end == '\0')
225 record->zi_end = record->zi_start + 1;
226 else if (*end == ',')
227 record->zi_end = strtoull(end + 1, &end, 10);
228
229 if (*end != '\0') {
230 (void) fprintf(stderr, "invalid range '%s': must be "
231 "a numeric range of the form 'start[,end]'\n",
232 range);
233 goto out;
234 }
235 }
236
237 switch (type) {
238 case TYPE_DATA:
239 break;
240
241 case TYPE_DNODE:
242 /*
243 * If this is a request to inject faults into the dnode, then we
244 * must translate the current (objset,object) pair into an
245 * offset within the metadnode for the objset. Specifying any
246 * kind of range with type 'dnode' is illegal.
247 */
248 if (range != NULL) {
249 (void) fprintf(stderr, "range cannot be specified when "
250 "type is 'dnode'\n");
251 goto out;
252 }
253
254 record->zi_start = record->zi_object * sizeof (dnode_phys_t);
255 record->zi_end = record->zi_start + sizeof (dnode_phys_t);
256 record->zi_object = 0;
257 break;
258 }
259
260 /*
261 * Get the dnode associated with object, so we can calculate the block
262 * size.
263 */
264 if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
265 B_TRUE, FTAG, &os)) != 0) {
266 (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
267 dataset, strerror(err));
268 goto out;
269 }
270
271 if (record->zi_object == 0) {
272 dn = DMU_META_DNODE(os);
273 } else {
274 err = dnode_hold(os, record->zi_object, FTAG, &dn);
275 if (err != 0) {
276 (void) fprintf(stderr, "failed to hold dnode "
277 "for object %llu\n",
278 (u_longlong_t)record->zi_object);
279 goto out;
280 }
281 }
282
283
284 ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
285 ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
286
287 /*
288 * Translate range into block IDs.
289 */
290 if (record->zi_start != 0 || record->zi_end != -1ULL) {
291 record->zi_start >>= dn->dn_datablkshift;
292 record->zi_end >>= dn->dn_datablkshift;
293 }
294
295 /*
296 * Check level, and then translate level 0 blkids into ranges
297 * appropriate for level of indirection.
298 */
299 record->zi_level = level;
300 if (level > 0) {
301 ziprintf("level 0 blkid range: [%llu, %llu]\n",
302 record->zi_start, record->zi_end);
303
304 if (level >= dn->dn_nlevels) {
305 (void) fprintf(stderr, "level %d exceeds max level "
306 "of object (%d)\n", level, dn->dn_nlevels - 1);
307 goto out;
308 }
309
310 if (record->zi_start != 0 || record->zi_end != 0) {
311 int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
312
313 for (; level > 0; level--) {
314 record->zi_start >>= shift;
315 record->zi_end >>= shift;
316 }
317 }
318 }
319
320 ret = 0;
321 out:
322 if (dn) {
323 if (dn != DMU_META_DNODE(os))
324 dnode_rele(dn, FTAG);
325 }
326 if (os)
327 dmu_objset_disown(os, FTAG);
328
329 return (ret);
330 }
331
332 int
333 translate_record(err_type_t type, const char *object, const char *range,
334 int level, zinject_record_t *record, char *poolname, char *dataset)
335 {
336 char path[MAXPATHLEN];
337 char *slash;
338 struct stat64 statbuf;
339 int ret = -1;
340
341 kernel_init(FREAD);
342
343 debug = (getenv("ZINJECT_DEBUG") != NULL);
344
345 ziprintf("translating: %s\n", object);
346
347 if (MOS_TYPE(type)) {
348 /*
349 * MOS objects are treated specially.
350 */
351 switch (type) {
352 case TYPE_MOS:
353 record->zi_type = 0;
354 break;
355 case TYPE_MOSDIR:
356 record->zi_type = DMU_OT_OBJECT_DIRECTORY;
357 break;
358 case TYPE_METASLAB:
359 record->zi_type = DMU_OT_OBJECT_ARRAY;
360 break;
361 case TYPE_CONFIG:
362 record->zi_type = DMU_OT_PACKED_NVLIST;
363 break;
364 case TYPE_BPOBJ:
365 record->zi_type = DMU_OT_BPOBJ;
366 break;
367 case TYPE_SPACEMAP:
368 record->zi_type = DMU_OT_SPACE_MAP;
369 break;
370 case TYPE_ERRLOG:
371 record->zi_type = DMU_OT_ERROR_LOG;
372 break;
373 }
374
375 dataset[0] = '\0';
376 (void) strcpy(poolname, object);
377 return (0);
378 }
379
380 /*
381 * Convert a full path into a (dataset, file) pair.
382 */
383 if (parse_pathname(object, dataset, path, &statbuf) != 0)
384 goto err;
385
386 ziprintf(" dataset: %s\n", dataset);
387 ziprintf(" path: %s\n", path);
388
389 /*
390 * Convert (dataset, file) into (objset, object)
391 */
392 if (object_from_path(dataset, path, &statbuf, record) != 0)
393 goto err;
394
395 ziprintf("raw objset: %llu\n", record->zi_objset);
396 ziprintf("raw object: %llu\n", record->zi_object);
397
398 /*
399 * For the given object, calculate the real (type, level, range)
400 */
401 if (calculate_range(dataset, type, level, (char *)range, record) != 0)
402 goto err;
403
404 ziprintf(" objset: %llu\n", record->zi_objset);
405 ziprintf(" object: %llu\n", record->zi_object);
406 if (record->zi_start == 0 &&
407 record->zi_end == -1ULL)
408 ziprintf(" range: all\n");
409 else
410 ziprintf(" range: [%llu, %llu]\n", record->zi_start,
411 record->zi_end);
412
413 /*
414 * Copy the pool name
415 */
416 (void) strcpy(poolname, dataset);
417 if ((slash = strchr(poolname, '/')) != NULL)
418 *slash = '\0';
419
420 ret = 0;
421
422 err:
423 kernel_fini();
424 return (ret);
425 }
426
427 int
428 translate_raw(const char *str, zinject_record_t *record)
429 {
430 /*
431 * A raw bookmark of the form objset:object:level:blkid, where each
432 * number is a hexidecimal value.
433 */
434 if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
435 (u_longlong_t *)&record->zi_object, &record->zi_level,
436 (u_longlong_t *)&record->zi_start) != 4) {
437 (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
438 "'objset:object:level:blkid'\n", str);
439 return (-1);
440 }
441
442 record->zi_end = record->zi_start;
443
444 return (0);
445 }
446
447 int
448 translate_device(const char *pool, const char *device, err_type_t label_type,
449 zinject_record_t *record)
450 {
451 char *end;
452 zpool_handle_t *zhp;
453 nvlist_t *tgt;
454 boolean_t isspare, iscache;
455
456 /*
457 * Given a device name or GUID, create an appropriate injection record
458 * with zi_guid set.
459 */
460 if ((zhp = zpool_open(g_zfs, pool)) == NULL)
461 return (-1);
462
463 record->zi_guid = strtoull(device, &end, 16);
464 if (record->zi_guid == 0 || *end != '\0') {
465 tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL,
466 NULL);
467
468 if (tgt == NULL) {
469 (void) fprintf(stderr, "cannot find device '%s' in "
470 "pool '%s'\n", device, pool);
471 return (-1);
472 }
473
474 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
475 &record->zi_guid) == 0);
476 }
477
478 /*
479 * Device faults can take on three different forms:
480 * 1). delayed or hanging I/O
481 * 2). zfs label faults
482 * 3). generic disk faults
483 */
484 if (record->zi_timer != 0) {
485 record->zi_cmd = ZINJECT_DELAY_IO;
486 } else if (label_type != TYPE_INVAL) {
487 record->zi_cmd = ZINJECT_LABEL_FAULT;
488 } else {
489 record->zi_cmd = ZINJECT_DEVICE_FAULT;
490 }
491
492 switch (label_type) {
493 case TYPE_LABEL_UBERBLOCK:
494 record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
495 record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
496 break;
497 case TYPE_LABEL_NVLIST:
498 record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
499 record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
500 break;
501 case TYPE_LABEL_PAD1:
502 record->zi_start = offsetof(vdev_label_t, vl_pad1);
503 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
504 break;
505 case TYPE_LABEL_PAD2:
506 record->zi_start = offsetof(vdev_label_t, vl_pad2);
507 record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
508 break;
509 }
510 return (0);
511 }