Print this page
NEX-13937 Improve kstat performance
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
6328 Fix cstyle errors in zfs codebase (fix studio)
6328 Fix cstyle errors in zfs codebase
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Jorgen Lundman <lundman@lundman.net>
Approved by: Robert Mustacchi <rm@joyent.com>
6209 libc mutexes break kernel writers hearts
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Approved by: Dan McDonald <danmcd@omniti.com>
5815 libzpool's panic function doesn't set global panicstr, ::status not as useful
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Rich Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@omniti.com>
re #12393 rb3935 Kerberos and smbd disagree about who is our AD server (fix elf runtime attributes check)
re #11612 rb3907 Failing vdev of a mirrored pool should not take zfs operations out of action for extended periods of time.
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/lib/libzpool/common/kernel.c
+++ new/usr/src/lib/libzpool/common/kernel.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 25 * Copyright 2017 RackTop Systems.
26 26 */
27 27
28 28 #include <assert.h>
29 29 #include <fcntl.h>
30 30 #include <poll.h>
31 31 #include <stdio.h>
32 32 #include <stdlib.h>
33 33 #include <string.h>
34 34 #include <zlib.h>
35 35 #include <libgen.h>
36 36 #include <sys/spa.h>
37 37 #include <sys/stat.h>
38 38 #include <sys/processor.h>
39 39 #include <sys/zfs_context.h>
40 40 #include <sys/rrwlock.h>
41 41 #include <sys/zmod.h>
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
42 42 #include <sys/utsname.h>
43 43 #include <sys/systeminfo.h>
44 44
45 45 extern void system_taskq_init(void);
46 46 extern void system_taskq_fini(void);
47 47
48 48 /*
49 49 * Emulation of kernel services in userland.
50 50 */
51 51
52 -pgcnt_t physmem;
52 +volatile pgcnt_t physmem;
53 53 vnode_t *rootdir = (vnode_t *)0xabcd1234;
54 54 char hw_serial[HW_HOSTID_LEN];
55 55 kmutex_t cpu_lock;
56 56 vmem_t *zio_arena = NULL;
57 57
58 58 /* If set, all blocks read will be copied to the specified directory. */
59 59 char *vn_dumpdir = NULL;
60 60
61 61 struct utsname utsname = {
62 62 "userland", "libzpool", "1", "1", "na"
63 63 };
64 64
65 65 /*
66 66 * =========================================================================
67 67 * vnode operations
68 68 * =========================================================================
69 69 */
70 70 /*
71 71 * Note: for the xxxat() versions of these functions, we assume that the
72 72 * starting vp is always rootdir (which is true for spa_directory.c, the only
73 73 * ZFS consumer of these interfaces). We assert this is true, and then emulate
74 74 * them by adding '/' in front of the path.
75 75 */
76 76
77 77 /*ARGSUSED*/
78 78 int
79 79 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
80 80 {
81 81 int fd;
82 82 int dump_fd;
83 83 vnode_t *vp;
84 84 int old_umask;
85 85 char realpath[MAXPATHLEN];
86 86 struct stat64 st;
87 87
88 88 /*
89 89 * If we're accessing a real disk from userland, we need to use
90 90 * the character interface to avoid caching. This is particularly
91 91 * important if we're trying to look at a real in-kernel storage
92 92 * pool from userland, e.g. via zdb, because otherwise we won't
93 93 * see the changes occurring under the segmap cache.
94 94 * On the other hand, the stupid character device returns zero
95 95 * for its size. So -- gag -- we open the block device to get
96 96 * its size, and remember it for subsequent VOP_GETATTR().
97 97 */
98 98 if (strncmp(path, "/dev/", 5) == 0) {
99 99 char *dsk;
100 100 fd = open64(path, O_RDONLY);
101 101 if (fd == -1)
102 102 return (errno);
103 103 if (fstat64(fd, &st) == -1) {
104 104 close(fd);
105 105 return (errno);
106 106 }
107 107 close(fd);
108 108 (void) sprintf(realpath, "%s", path);
109 109 dsk = strstr(path, "/dsk/");
110 110 if (dsk != NULL)
111 111 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
112 112 dsk + 1);
113 113 } else {
114 114 (void) sprintf(realpath, "%s", path);
115 115 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
116 116 return (errno);
117 117 }
118 118
119 119 if (flags & FCREAT)
120 120 old_umask = umask(0);
121 121
122 122 /*
123 123 * The construct 'flags - FREAD' conveniently maps combinations of
124 124 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
125 125 */
126 126 fd = open64(realpath, flags - FREAD, mode);
127 127
128 128 if (flags & FCREAT)
129 129 (void) umask(old_umask);
130 130
131 131 if (vn_dumpdir != NULL) {
132 132 char dumppath[MAXPATHLEN];
133 133 (void) snprintf(dumppath, sizeof (dumppath),
134 134 "%s/%s", vn_dumpdir, basename(realpath));
135 135 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
136 136 if (dump_fd == -1)
137 137 return (errno);
138 138 } else {
139 139 dump_fd = -1;
140 140 }
141 141
142 142 if (fd == -1)
143 143 return (errno);
144 144
145 145 if (fstat64(fd, &st) == -1) {
146 146 close(fd);
147 147 return (errno);
148 148 }
149 149
150 150 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
151 151
152 152 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
153 153
154 154 vp->v_fd = fd;
155 155 vp->v_size = st.st_size;
156 156 vp->v_path = spa_strdup(path);
157 157 vp->v_dump_fd = dump_fd;
158 158
159 159 return (0);
160 160 }
161 161
162 162 /*ARGSUSED*/
163 163 int
164 164 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
165 165 int x3, vnode_t *startvp, int fd)
166 166 {
167 167 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
168 168 int ret;
169 169
170 170 ASSERT(startvp == rootdir);
171 171 (void) sprintf(realpath, "/%s", path);
172 172
173 173 /* fd ignored for now, need if want to simulate nbmand support */
174 174 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
175 175
176 176 umem_free(realpath, strlen(path) + 2);
177 177
178 178 return (ret);
179 179 }
180 180
181 181 /*ARGSUSED*/
182 182 int
183 183 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
184 184 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
185 185 {
186 186 ssize_t iolen, split;
187 187
188 188 if (uio == UIO_READ) {
189 189 iolen = pread64(vp->v_fd, addr, len, offset);
190 190 if (vp->v_dump_fd != -1) {
191 191 int status =
192 192 pwrite64(vp->v_dump_fd, addr, iolen, offset);
193 193 ASSERT(status != -1);
194 194 }
195 195 } else {
196 196 /*
197 197 * To simulate partial disk writes, we split writes into two
198 198 * system calls so that the process can be killed in between.
199 199 */
200 200 int sectors = len >> SPA_MINBLOCKSHIFT;
201 201 split = (sectors > 0 ? rand() % sectors : 0) <<
202 202 SPA_MINBLOCKSHIFT;
203 203 iolen = pwrite64(vp->v_fd, addr, split, offset);
204 204 iolen += pwrite64(vp->v_fd, (char *)addr + split,
205 205 len - split, offset + split);
206 206 }
207 207
208 208 if (iolen == -1)
209 209 return (errno);
210 210 if (residp)
211 211 *residp = len - iolen;
212 212 else if (iolen != len)
213 213 return (EIO);
214 214 return (0);
215 215 }
216 216
217 217 void
218 218 vn_close(vnode_t *vp)
219 219 {
220 220 close(vp->v_fd);
221 221 if (vp->v_dump_fd != -1)
222 222 close(vp->v_dump_fd);
223 223 spa_strfree(vp->v_path);
224 224 umem_free(vp, sizeof (vnode_t));
225 225 }
226 226
227 227 /*
228 228 * At a minimum we need to update the size since vdev_reopen()
229 229 * will no longer call vn_openat().
230 230 */
231 231 int
232 232 fop_getattr(vnode_t *vp, vattr_t *vap)
233 233 {
234 234 struct stat64 st;
235 235
236 236 if (fstat64(vp->v_fd, &st) == -1) {
237 237 close(vp->v_fd);
238 238 return (errno);
239 239 }
240 240
241 241 vap->va_size = st.st_size;
242 242 return (0);
243 243 }
244 244
245 245 #ifdef ZFS_DEBUG
246 246
247 247 /*
248 248 * =========================================================================
249 249 * Figure out which debugging statements to print
250 250 * =========================================================================
251 251 */
252 252
253 253 static char *dprintf_string;
254 254 static int dprintf_print_all;
255 255
256 256 int
257 257 dprintf_find_string(const char *string)
258 258 {
259 259 char *tmp_str = dprintf_string;
260 260 int len = strlen(string);
261 261
262 262 /*
263 263 * Find out if this is a string we want to print.
264 264 * String format: file1.c,function_name1,file2.c,file3.c
265 265 */
266 266
267 267 while (tmp_str != NULL) {
268 268 if (strncmp(tmp_str, string, len) == 0 &&
269 269 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
270 270 return (1);
271 271 tmp_str = strchr(tmp_str, ',');
272 272 if (tmp_str != NULL)
273 273 tmp_str++; /* Get rid of , */
274 274 }
275 275 return (0);
276 276 }
277 277
278 278 void
279 279 dprintf_setup(int *argc, char **argv)
280 280 {
281 281 int i, j;
282 282
283 283 /*
284 284 * Debugging can be specified two ways: by setting the
285 285 * environment variable ZFS_DEBUG, or by including a
286 286 * "debug=..." argument on the command line. The command
287 287 * line setting overrides the environment variable.
288 288 */
289 289
290 290 for (i = 1; i < *argc; i++) {
291 291 int len = strlen("debug=");
292 292 /* First look for a command line argument */
293 293 if (strncmp("debug=", argv[i], len) == 0) {
294 294 dprintf_string = argv[i] + len;
295 295 /* Remove from args */
296 296 for (j = i; j < *argc; j++)
297 297 argv[j] = argv[j+1];
298 298 argv[j] = NULL;
299 299 (*argc)--;
300 300 }
301 301 }
302 302
303 303 if (dprintf_string == NULL) {
304 304 /* Look for ZFS_DEBUG environment variable */
305 305 dprintf_string = getenv("ZFS_DEBUG");
306 306 }
307 307
308 308 /*
309 309 * Are we just turning on all debugging?
310 310 */
311 311 if (dprintf_find_string("on"))
312 312 dprintf_print_all = 1;
313 313
314 314 if (dprintf_string != NULL)
315 315 zfs_flags |= ZFS_DEBUG_DPRINTF;
316 316 }
317 317
318 318 /*
319 319 * =========================================================================
320 320 * debug printfs
321 321 * =========================================================================
322 322 */
323 323 void
324 324 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
325 325 {
326 326 const char *newfile;
327 327 va_list adx;
328 328
329 329 /*
330 330 * Get rid of annoying "../common/" prefix to filename.
331 331 */
332 332 newfile = strrchr(file, '/');
333 333 if (newfile != NULL) {
334 334 newfile = newfile + 1; /* Get rid of leading / */
335 335 } else {
336 336 newfile = file;
337 337 }
338 338
339 339 if (dprintf_print_all ||
340 340 dprintf_find_string(newfile) ||
341 341 dprintf_find_string(func)) {
342 342 /* Print out just the function name if requested */
343 343 flockfile(stdout);
344 344 if (dprintf_find_string("pid"))
345 345 (void) printf("%d ", getpid());
346 346 if (dprintf_find_string("tid"))
347 347 (void) printf("%u ", thr_self());
348 348 if (dprintf_find_string("cpu"))
349 349 (void) printf("%u ", getcpuid());
350 350 if (dprintf_find_string("time"))
351 351 (void) printf("%llu ", gethrtime());
352 352 if (dprintf_find_string("long"))
353 353 (void) printf("%s, line %d: ", newfile, line);
354 354 (void) printf("%s: ", func);
355 355 va_start(adx, fmt);
356 356 (void) vprintf(fmt, adx);
357 357 va_end(adx);
358 358 funlockfile(stdout);
359 359 }
360 360 }
361 361
362 362 #endif /* ZFS_DEBUG */
363 363
364 364 /*
365 365 * =========================================================================
366 366 * kobj interfaces
367 367 * =========================================================================
368 368 */
369 369 struct _buf *
370 370 kobj_open_file(char *name)
371 371 {
372 372 struct _buf *file;
373 373 vnode_t *vp;
374 374
375 375 /* set vp as the _fd field of the file */
376 376 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
377 377 -1) != 0)
378 378 return ((void *)-1UL);
379 379
380 380 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
381 381 file->_fd = (intptr_t)vp;
382 382 return (file);
383 383 }
384 384
385 385 int
386 386 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
387 387 {
388 388 ssize_t resid;
389 389
390 390 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
391 391 UIO_SYSSPACE, 0, 0, 0, &resid);
392 392
393 393 return (size - resid);
394 394 }
395 395
396 396 void
397 397 kobj_close_file(struct _buf *file)
398 398 {
399 399 vn_close((vnode_t *)file->_fd);
400 400 umem_free(file, sizeof (struct _buf));
401 401 }
402 402
403 403 int
404 404 kobj_get_filesize(struct _buf *file, uint64_t *size)
405 405 {
406 406 struct stat64 st;
407 407 vnode_t *vp = (vnode_t *)file->_fd;
408 408
409 409 if (fstat64(vp->v_fd, &st) == -1) {
410 410 vn_close(vp);
411 411 return (errno);
412 412 }
413 413 *size = st.st_size;
414 414 return (0);
415 415 }
416 416
417 417 /*
418 418 * =========================================================================
419 419 * kernel emulation setup & teardown
420 420 * =========================================================================
421 421 */
422 422 static int
423 423 umem_out_of_memory(void)
424 424 {
425 425 char errmsg[] = "out of memory -- generating core dump\n";
426 426
427 427 write(fileno(stderr), errmsg, sizeof (errmsg));
428 428 abort();
429 429 return (0);
430 430 }
431 431
432 432 void
433 433 kernel_init(int mode)
434 434 {
435 435 extern uint_t rrw_tsd_key;
436 436
437 437 umem_nofail_callback(umem_out_of_memory);
438 438
439 439 physmem = sysconf(_SC_PHYS_PAGES);
440 440
441 441 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
442 442 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
443 443
444 444 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
445 445 (mode & FWRITE) ? gethostid() : 0);
446 446
447 447 system_taskq_init();
448 448
449 449 mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
450 450
451 451 spa_init(mode);
452 452
453 453 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
|
↓ open down ↓ |
391 lines elided |
↑ open up ↑ |
454 454 }
455 455
456 456 void
457 457 kernel_fini(void)
458 458 {
459 459 spa_fini();
460 460
461 461 system_taskq_fini();
462 462 }
463 463
464 -/* ARGSUSED */
465 -uint32_t
466 -zone_get_hostid(void *zonep)
467 -{
468 - /*
469 - * We're emulating the system's hostid in userland.
470 - */
471 - return (strtoul(hw_serial, NULL, 10));
472 -}
473 -
474 464 int
475 465 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
476 466 {
477 467 int ret;
478 468 uLongf len = *dstlen;
479 469
480 470 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
481 471 *dstlen = (size_t)len;
482 472
483 473 return (ret);
484 474 }
485 475
486 476 int
487 477 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
488 478 int level)
489 479 {
490 480 int ret;
491 481 uLongf len = *dstlen;
492 482
493 483 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
494 484 *dstlen = (size_t)len;
495 485
496 486 return (ret);
497 487 }
498 488
499 489 int
500 490 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
501 491 {
502 492 return (0);
503 493 }
504 494
505 495 int
506 496 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
507 497 {
508 498 return (0);
509 499 }
510 500
511 501 int
512 502 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
513 503 {
514 504 return (0);
515 505 }
516 506
517 507 /* ARGSUSED */
518 508 int
519 509 zfs_onexit_fd_hold(int fd, minor_t *minorp)
520 510 {
521 511 *minorp = 0;
522 512 return (0);
523 513 }
524 514
525 515 /* ARGSUSED */
526 516 void
527 517 zfs_onexit_fd_rele(int fd)
528 518 {
529 519 }
530 520
531 521 /* ARGSUSED */
532 522 int
533 523 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
534 524 uint64_t *action_handle)
535 525 {
536 526 return (0);
537 527 }
538 528
539 529 /* ARGSUSED */
540 530 int
541 531 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
542 532 {
543 533 return (0);
544 534 }
545 535
546 536 /* ARGSUSED */
547 537 int
548 538 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
549 539 {
550 540 return (0);
551 541 }
552 542
553 543 void
554 544 bioinit(buf_t *bp)
555 545 {
556 546 bzero(bp, sizeof (buf_t));
557 547 }
558 548
559 549 void
560 550 biodone(buf_t *bp)
561 551 {
562 552 if (bp->b_iodone != NULL) {
563 553 (*(bp->b_iodone))(bp);
564 554 return;
565 555 }
566 556 ASSERT((bp->b_flags & B_DONE) == 0);
567 557 bp->b_flags |= B_DONE;
568 558 }
569 559
570 560 void
571 561 bioerror(buf_t *bp, int error)
572 562 {
573 563 ASSERT(bp != NULL);
574 564 ASSERT(error >= 0);
575 565
576 566 if (error != 0) {
577 567 bp->b_flags |= B_ERROR;
578 568 } else {
579 569 bp->b_flags &= ~B_ERROR;
580 570 }
581 571 bp->b_error = error;
582 572 }
583 573
584 574
585 575 int
586 576 geterror(struct buf *bp)
587 577 {
588 578 int error = 0;
589 579
590 580 if (bp->b_flags & B_ERROR) {
591 581 error = bp->b_error;
592 582 if (!error)
593 583 error = EIO;
594 584 }
595 585 return (error);
596 586 }
|
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX