1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  25  */
  26 
  27 #include <assert.h>
  28 #include <fcntl.h>
  29 #include <poll.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <zlib.h>
  34 #include <sys/spa.h>
  35 #include <sys/stat.h>
  36 #include <sys/processor.h>
  37 #include <sys/zfs_context.h>
  38 #include <sys/rrwlock.h>
  39 #include <sys/zmod.h>
  40 #include <sys/utsname.h>
  41 #include <sys/systeminfo.h>
  42 
  43 /*
  44  * Emulation of kernel services in userland.
  45  */
  46 
  47 int aok;
  48 uint64_t physmem;
  49 vnode_t *rootdir = (vnode_t *)0xabcd1234;
  50 char hw_serial[HW_HOSTID_LEN];
  51 kmutex_t cpu_lock;
  52 vmem_t *zio_arena = NULL;
  53 
  54 struct utsname utsname = {
  55         "userland", "libzpool", "1", "1", "na"
  56 };
  57 
  58 /* this only exists to have its address taken */
  59 struct proc p0;
  60 
  61 /*
  62  * =========================================================================
  63  * threads
  64  * =========================================================================
  65  */
  66 /*ARGSUSED*/
  67 kthread_t *
  68 zk_thread_create(void (*func)(), void *arg)
  69 {
  70         thread_t tid;
  71 
  72         VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
  73             &tid) == 0);
  74 
  75         return ((void *)(uintptr_t)tid);
  76 }
  77 
  78 /*
  79  * =========================================================================
  80  * kstats
  81  * =========================================================================
  82  */
  83 /*ARGSUSED*/
  84 kstat_t *
  85 kstat_create(const char *module, int instance, const char *name,
  86     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
  87 {
  88         return (NULL);
  89 }
  90 
  91 /*ARGSUSED*/
  92 void
  93 kstat_install(kstat_t *ksp)
  94 {}
  95 
  96 /*ARGSUSED*/
  97 void
  98 kstat_delete(kstat_t *ksp)
  99 {}
 100 
 101 /*ARGSUSED*/
 102 void
 103 kstat_waitq_enter(kstat_io_t *kiop)
 104 {}
 105 
 106 /*ARGSUSED*/
 107 void
 108 kstat_waitq_exit(kstat_io_t *kiop)
 109 {}
 110 
 111 /*ARGSUSED*/
 112 void
 113 kstat_runq_enter(kstat_io_t *kiop)
 114 {}
 115 
 116 /*ARGSUSED*/
 117 void
 118 kstat_runq_exit(kstat_io_t *kiop)
 119 {}
 120 
 121 /*ARGSUSED*/
 122 void
 123 kstat_waitq_to_runq(kstat_io_t *kiop)
 124 {}
 125 
 126 /*ARGSUSED*/
 127 void
 128 kstat_runq_back_to_waitq(kstat_io_t *kiop)
 129 {}
 130 
 131 /*
 132  * =========================================================================
 133  * mutexes
 134  * =========================================================================
 135  */
 136 void
 137 zmutex_init(kmutex_t *mp)
 138 {
 139         mp->m_owner = NULL;
 140         mp->initialized = B_TRUE;
 141         (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
 142 }
 143 
 144 void
 145 zmutex_destroy(kmutex_t *mp)
 146 {
 147         ASSERT(mp->initialized == B_TRUE);
 148         ASSERT(mp->m_owner == NULL);
 149         (void) _mutex_destroy(&(mp)->m_lock);
 150         mp->m_owner = (void *)-1UL;
 151         mp->initialized = B_FALSE;
 152 }
 153 
 154 void
 155 mutex_enter(kmutex_t *mp)
 156 {
 157         ASSERT(mp->initialized == B_TRUE);
 158         ASSERT(mp->m_owner != (void *)-1UL);
 159         ASSERT(mp->m_owner != curthread);
 160         VERIFY(mutex_lock(&mp->m_lock) == 0);
 161         ASSERT(mp->m_owner == NULL);
 162         mp->m_owner = curthread;
 163 }
 164 
 165 int
 166 mutex_tryenter(kmutex_t *mp)
 167 {
 168         ASSERT(mp->initialized == B_TRUE);
 169         ASSERT(mp->m_owner != (void *)-1UL);
 170         if (0 == mutex_trylock(&mp->m_lock)) {
 171                 ASSERT(mp->m_owner == NULL);
 172                 mp->m_owner = curthread;
 173                 return (1);
 174         } else {
 175                 return (0);
 176         }
 177 }
 178 
 179 void
 180 mutex_exit(kmutex_t *mp)
 181 {
 182         ASSERT(mp->initialized == B_TRUE);
 183         ASSERT(mutex_owner(mp) == curthread);
 184         mp->m_owner = NULL;
 185         VERIFY(mutex_unlock(&mp->m_lock) == 0);
 186 }
 187 
 188 void *
 189 mutex_owner(kmutex_t *mp)
 190 {
 191         ASSERT(mp->initialized == B_TRUE);
 192         return (mp->m_owner);
 193 }
 194 
 195 /*
 196  * =========================================================================
 197  * rwlocks
 198  * =========================================================================
 199  */
 200 /*ARGSUSED*/
 201 void
 202 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
 203 {
 204         rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
 205         rwlp->rw_owner = NULL;
 206         rwlp->initialized = B_TRUE;
 207 }
 208 
 209 void
 210 rw_destroy(krwlock_t *rwlp)
 211 {
 212         rwlock_destroy(&rwlp->rw_lock);
 213         rwlp->rw_owner = (void *)-1UL;
 214         rwlp->initialized = B_FALSE;
 215 }
 216 
 217 void
 218 rw_enter(krwlock_t *rwlp, krw_t rw)
 219 {
 220         ASSERT(!RW_LOCK_HELD(rwlp));
 221         ASSERT(rwlp->initialized == B_TRUE);
 222         ASSERT(rwlp->rw_owner != (void *)-1UL);
 223         ASSERT(rwlp->rw_owner != curthread);
 224 
 225         if (rw == RW_WRITER)
 226                 VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
 227         else
 228                 VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
 229 
 230         rwlp->rw_owner = curthread;
 231 }
 232 
 233 void
 234 rw_exit(krwlock_t *rwlp)
 235 {
 236         ASSERT(rwlp->initialized == B_TRUE);
 237         ASSERT(rwlp->rw_owner != (void *)-1UL);
 238 
 239         rwlp->rw_owner = NULL;
 240         VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
 241 }
 242 
 243 int
 244 rw_tryenter(krwlock_t *rwlp, krw_t rw)
 245 {
 246         int rv;
 247 
 248         ASSERT(rwlp->initialized == B_TRUE);
 249         ASSERT(rwlp->rw_owner != (void *)-1UL);
 250 
 251         if (rw == RW_WRITER)
 252                 rv = rw_trywrlock(&rwlp->rw_lock);
 253         else
 254                 rv = rw_tryrdlock(&rwlp->rw_lock);
 255 
 256         if (rv == 0) {
 257                 rwlp->rw_owner = curthread;
 258                 return (1);
 259         }
 260 
 261         return (0);
 262 }
 263 
 264 /*ARGSUSED*/
 265 int
 266 rw_tryupgrade(krwlock_t *rwlp)
 267 {
 268         ASSERT(rwlp->initialized == B_TRUE);
 269         ASSERT(rwlp->rw_owner != (void *)-1UL);
 270 
 271         return (0);
 272 }
 273 
 274 /*
 275  * =========================================================================
 276  * condition variables
 277  * =========================================================================
 278  */
 279 /*ARGSUSED*/
 280 void
 281 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
 282 {
 283         VERIFY(cond_init(cv, type, NULL) == 0);
 284 }
 285 
 286 void
 287 cv_destroy(kcondvar_t *cv)
 288 {
 289         VERIFY(cond_destroy(cv) == 0);
 290 }
 291 
 292 void
 293 cv_wait(kcondvar_t *cv, kmutex_t *mp)
 294 {
 295         ASSERT(mutex_owner(mp) == curthread);
 296         mp->m_owner = NULL;
 297         int ret = cond_wait(cv, &mp->m_lock);
 298         VERIFY(ret == 0 || ret == EINTR);
 299         mp->m_owner = curthread;
 300 }
 301 
 302 clock_t
 303 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
 304 {
 305         int error;
 306         timestruc_t ts;
 307         clock_t delta;
 308 
 309 top:
 310         delta = abstime - ddi_get_lbolt();
 311         if (delta <= 0)
 312                 return (-1);
 313 
 314         ts.tv_sec = delta / hz;
 315         ts.tv_nsec = (delta % hz) * (NANOSEC / hz);
 316 
 317         ASSERT(mutex_owner(mp) == curthread);
 318         mp->m_owner = NULL;
 319         error = cond_reltimedwait(cv, &mp->m_lock, &ts);
 320         mp->m_owner = curthread;
 321 
 322         if (error == ETIME)
 323                 return (-1);
 324 
 325         if (error == EINTR)
 326                 goto top;
 327 
 328         ASSERT(error == 0);
 329 
 330         return (1);
 331 }
 332 
 333 /*ARGSUSED*/
 334 clock_t
 335 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
 336     int flag)
 337 {
 338         int error;
 339         timestruc_t ts;
 340         hrtime_t delta;
 341 
 342         ASSERT(flag == 0);
 343 
 344 top:
 345         delta = tim - gethrtime();
 346         if (delta <= 0)
 347                 return (-1);
 348 
 349         ts.tv_sec = delta / NANOSEC;
 350         ts.tv_nsec = delta % NANOSEC;
 351 
 352         ASSERT(mutex_owner(mp) == curthread);
 353         mp->m_owner = NULL;
 354         error = cond_reltimedwait(cv, &mp->m_lock, &ts);
 355         mp->m_owner = curthread;
 356 
 357         if (error == ETIME)
 358                 return (-1);
 359 
 360         if (error == EINTR)
 361                 goto top;
 362 
 363         ASSERT(error == 0);
 364 
 365         return (1);
 366 }
 367 
 368 void
 369 cv_signal(kcondvar_t *cv)
 370 {
 371         VERIFY(cond_signal(cv) == 0);
 372 }
 373 
 374 void
 375 cv_broadcast(kcondvar_t *cv)
 376 {
 377         VERIFY(cond_broadcast(cv) == 0);
 378 }
 379 
 380 /*
 381  * =========================================================================
 382  * vnode operations
 383  * =========================================================================
 384  */
 385 /*
 386  * Note: for the xxxat() versions of these functions, we assume that the
 387  * starting vp is always rootdir (which is true for spa_directory.c, the only
 388  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
 389  * them by adding '/' in front of the path.
 390  */
 391 
 392 /*ARGSUSED*/
 393 int
 394 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 395 {
 396         int fd;
 397         vnode_t *vp;
 398         int old_umask;
 399         char realpath[MAXPATHLEN];
 400         struct stat64 st;
 401 
 402         /*
 403          * If we're accessing a real disk from userland, we need to use
 404          * the character interface to avoid caching.  This is particularly
 405          * important if we're trying to look at a real in-kernel storage
 406          * pool from userland, e.g. via zdb, because otherwise we won't
 407          * see the changes occurring under the segmap cache.
 408          * On the other hand, the stupid character device returns zero
 409          * for its size.  So -- gag -- we open the block device to get
 410          * its size, and remember it for subsequent VOP_GETATTR().
 411          */
 412         if (strncmp(path, "/dev/", 5) == 0) {
 413                 char *dsk;
 414                 fd = open64(path, O_RDONLY);
 415                 if (fd == -1)
 416                         return (errno);
 417                 if (fstat64(fd, &st) == -1) {
 418                         close(fd);
 419                         return (errno);
 420                 }
 421                 close(fd);
 422                 (void) sprintf(realpath, "%s", path);
 423                 dsk = strstr(path, "/dsk/");
 424                 if (dsk != NULL)
 425                         (void) sprintf(realpath + (dsk - path) + 1, "r%s",
 426                             dsk + 1);
 427         } else {
 428                 (void) sprintf(realpath, "%s", path);
 429                 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
 430                         return (errno);
 431         }
 432 
 433         if (flags & FCREAT)
 434                 old_umask = umask(0);
 435 
 436         /*
 437          * The construct 'flags - FREAD' conveniently maps combinations of
 438          * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
 439          */
 440         fd = open64(realpath, flags - FREAD, mode);
 441 
 442         if (flags & FCREAT)
 443                 (void) umask(old_umask);
 444 
 445         if (fd == -1)
 446                 return (errno);
 447 
 448         if (fstat64(fd, &st) == -1) {
 449                 close(fd);
 450                 return (errno);
 451         }
 452 
 453         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 454 
 455         *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
 456 
 457         vp->v_fd = fd;
 458         vp->v_size = st.st_size;
 459         vp->v_path = spa_strdup(path);
 460 
 461         return (0);
 462 }
 463 
 464 /*ARGSUSED*/
 465 int
 466 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
 467     int x3, vnode_t *startvp, int fd)
 468 {
 469         char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
 470         int ret;
 471 
 472         ASSERT(startvp == rootdir);
 473         (void) sprintf(realpath, "/%s", path);
 474 
 475         /* fd ignored for now, need if want to simulate nbmand support */
 476         ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
 477 
 478         umem_free(realpath, strlen(path) + 2);
 479 
 480         return (ret);
 481 }
 482 
 483 /*ARGSUSED*/
 484 int
 485 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
 486         int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
 487 {
 488         ssize_t iolen, split;
 489 
 490         if (uio == UIO_READ) {
 491                 iolen = pread64(vp->v_fd, addr, len, offset);
 492         } else {
 493                 /*
 494                  * To simulate partial disk writes, we split writes into two
 495                  * system calls so that the process can be killed in between.
 496                  */
 497                 int sectors = len >> SPA_MINBLOCKSHIFT;
 498                 split = (sectors > 0 ? rand() % sectors : 0) <<
 499                     SPA_MINBLOCKSHIFT;
 500                 iolen = pwrite64(vp->v_fd, addr, split, offset);
 501                 iolen += pwrite64(vp->v_fd, (char *)addr + split,
 502                     len - split, offset + split);
 503         }
 504 
 505         if (iolen == -1)
 506                 return (errno);
 507         if (residp)
 508                 *residp = len - iolen;
 509         else if (iolen != len)
 510                 return (EIO);
 511         return (0);
 512 }
 513 
 514 void
 515 vn_close(vnode_t *vp)
 516 {
 517         close(vp->v_fd);
 518         spa_strfree(vp->v_path);
 519         umem_free(vp, sizeof (vnode_t));
 520 }
 521 
 522 /*
 523  * At a minimum we need to update the size since vdev_reopen()
 524  * will no longer call vn_openat().
 525  */
 526 int
 527 fop_getattr(vnode_t *vp, vattr_t *vap)
 528 {
 529         struct stat64 st;
 530 
 531         if (fstat64(vp->v_fd, &st) == -1) {
 532                 close(vp->v_fd);
 533                 return (errno);
 534         }
 535 
 536         vap->va_size = st.st_size;
 537         return (0);
 538 }
 539 
 540 #ifdef ZFS_DEBUG
 541 
 542 /*
 543  * =========================================================================
 544  * Figure out which debugging statements to print
 545  * =========================================================================
 546  */
 547 
 548 static char *dprintf_string;
 549 static int dprintf_print_all;
 550 
 551 int
 552 dprintf_find_string(const char *string)
 553 {
 554         char *tmp_str = dprintf_string;
 555         int len = strlen(string);
 556 
 557         /*
 558          * Find out if this is a string we want to print.
 559          * String format: file1.c,function_name1,file2.c,file3.c
 560          */
 561 
 562         while (tmp_str != NULL) {
 563                 if (strncmp(tmp_str, string, len) == 0 &&
 564                     (tmp_str[len] == ',' || tmp_str[len] == '\0'))
 565                         return (1);
 566                 tmp_str = strchr(tmp_str, ',');
 567                 if (tmp_str != NULL)
 568                         tmp_str++; /* Get rid of , */
 569         }
 570         return (0);
 571 }
 572 
 573 void
 574 dprintf_setup(int *argc, char **argv)
 575 {
 576         int i, j;
 577 
 578         /*
 579          * Debugging can be specified two ways: by setting the
 580          * environment variable ZFS_DEBUG, or by including a
 581          * "debug=..."  argument on the command line.  The command
 582          * line setting overrides the environment variable.
 583          */
 584 
 585         for (i = 1; i < *argc; i++) {
 586                 int len = strlen("debug=");
 587                 /* First look for a command line argument */
 588                 if (strncmp("debug=", argv[i], len) == 0) {
 589                         dprintf_string = argv[i] + len;
 590                         /* Remove from args */
 591                         for (j = i; j < *argc; j++)
 592                                 argv[j] = argv[j+1];
 593                         argv[j] = NULL;
 594                         (*argc)--;
 595                 }
 596         }
 597 
 598         if (dprintf_string == NULL) {
 599                 /* Look for ZFS_DEBUG environment variable */
 600                 dprintf_string = getenv("ZFS_DEBUG");
 601         }
 602 
 603         /*
 604          * Are we just turning on all debugging?
 605          */
 606         if (dprintf_find_string("on"))
 607                 dprintf_print_all = 1;
 608 }
 609 
 610 /*
 611  * =========================================================================
 612  * debug printfs
 613  * =========================================================================
 614  */
 615 void
 616 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 617 {
 618         const char *newfile;
 619         va_list adx;
 620 
 621         /*
 622          * Get rid of annoying "../common/" prefix to filename.
 623          */
 624         newfile = strrchr(file, '/');
 625         if (newfile != NULL) {
 626                 newfile = newfile + 1; /* Get rid of leading / */
 627         } else {
 628                 newfile = file;
 629         }
 630 
 631         if (dprintf_print_all ||
 632             dprintf_find_string(newfile) ||
 633             dprintf_find_string(func)) {
 634                 /* Print out just the function name if requested */
 635                 flockfile(stdout);
 636                 if (dprintf_find_string("pid"))
 637                         (void) printf("%d ", getpid());
 638                 if (dprintf_find_string("tid"))
 639                         (void) printf("%u ", thr_self());
 640                 if (dprintf_find_string("cpu"))
 641                         (void) printf("%u ", getcpuid());
 642                 if (dprintf_find_string("time"))
 643                         (void) printf("%llu ", gethrtime());
 644                 if (dprintf_find_string("long"))
 645                         (void) printf("%s, line %d: ", newfile, line);
 646                 (void) printf("%s: ", func);
 647                 va_start(adx, fmt);
 648                 (void) vprintf(fmt, adx);
 649                 va_end(adx);
 650                 funlockfile(stdout);
 651         }
 652 }
 653 
 654 #endif /* ZFS_DEBUG */
 655 
 656 /*
 657  * =========================================================================
 658  * cmn_err() and panic()
 659  * =========================================================================
 660  */
 661 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
 662 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
 663 
 664 void
 665 vpanic(const char *fmt, va_list adx)
 666 {
 667         (void) fprintf(stderr, "error: ");
 668         (void) vfprintf(stderr, fmt, adx);
 669         (void) fprintf(stderr, "\n");
 670 
 671         abort();        /* think of it as a "user-level crash dump" */
 672 }
 673 
 674 void
 675 panic(const char *fmt, ...)
 676 {
 677         va_list adx;
 678 
 679         va_start(adx, fmt);
 680         vpanic(fmt, adx);
 681         va_end(adx);
 682 }
 683 
 684 void
 685 vcmn_err(int ce, const char *fmt, va_list adx)
 686 {
 687         if (ce == CE_PANIC)
 688                 vpanic(fmt, adx);
 689         if (ce != CE_NOTE) {    /* suppress noise in userland stress testing */
 690                 (void) fprintf(stderr, "%s", ce_prefix[ce]);
 691                 (void) vfprintf(stderr, fmt, adx);
 692                 (void) fprintf(stderr, "%s", ce_suffix[ce]);
 693         }
 694 }
 695 
 696 /*PRINTFLIKE2*/
 697 void
 698 cmn_err(int ce, const char *fmt, ...)
 699 {
 700         va_list adx;
 701 
 702         va_start(adx, fmt);
 703         vcmn_err(ce, fmt, adx);
 704         va_end(adx);
 705 }
 706 
 707 /*
 708  * =========================================================================
 709  * kobj interfaces
 710  * =========================================================================
 711  */
 712 struct _buf *
 713 kobj_open_file(char *name)
 714 {
 715         struct _buf *file;
 716         vnode_t *vp;
 717 
 718         /* set vp as the _fd field of the file */
 719         if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
 720             -1) != 0)
 721                 return ((void *)-1UL);
 722 
 723         file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
 724         file->_fd = (intptr_t)vp;
 725         return (file);
 726 }
 727 
 728 int
 729 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 730 {
 731         ssize_t resid;
 732 
 733         vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
 734             UIO_SYSSPACE, 0, 0, 0, &resid);
 735 
 736         return (size - resid);
 737 }
 738 
 739 void
 740 kobj_close_file(struct _buf *file)
 741 {
 742         vn_close((vnode_t *)file->_fd);
 743         umem_free(file, sizeof (struct _buf));
 744 }
 745 
 746 int
 747 kobj_get_filesize(struct _buf *file, uint64_t *size)
 748 {
 749         struct stat64 st;
 750         vnode_t *vp = (vnode_t *)file->_fd;
 751 
 752         if (fstat64(vp->v_fd, &st) == -1) {
 753                 vn_close(vp);
 754                 return (errno);
 755         }
 756         *size = st.st_size;
 757         return (0);
 758 }
 759 
 760 /*
 761  * =========================================================================
 762  * misc routines
 763  * =========================================================================
 764  */
 765 
 766 void
 767 delay(clock_t ticks)
 768 {
 769         poll(0, 0, ticks * (1000 / hz));
 770 }
 771 
 772 /*
 773  * Find highest one bit set.
 774  *      Returns bit number + 1 of highest bit that is set, otherwise returns 0.
 775  * High order bit is 31 (or 63 in _LP64 kernel).
 776  */
 777 int
 778 highbit(ulong_t i)
 779 {
 780         register int h = 1;
 781 
 782         if (i == 0)
 783                 return (0);
 784 #ifdef _LP64
 785         if (i & 0xffffffff00000000ul) {
 786                 h += 32; i >>= 32;
 787         }
 788 #endif
 789         if (i & 0xffff0000) {
 790                 h += 16; i >>= 16;
 791         }
 792         if (i & 0xff00) {
 793                 h += 8; i >>= 8;
 794         }
 795         if (i & 0xf0) {
 796                 h += 4; i >>= 4;
 797         }
 798         if (i & 0xc) {
 799                 h += 2; i >>= 2;
 800         }
 801         if (i & 0x2) {
 802                 h += 1;
 803         }
 804         return (h);
 805 }
 806 
 807 static int random_fd = -1, urandom_fd = -1;
 808 
 809 static int
 810 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
 811 {
 812         size_t resid = len;
 813         ssize_t bytes;
 814 
 815         ASSERT(fd != -1);
 816 
 817         while (resid != 0) {
 818                 bytes = read(fd, ptr, resid);
 819                 ASSERT3S(bytes, >=, 0);
 820                 ptr += bytes;
 821                 resid -= bytes;
 822         }
 823 
 824         return (0);
 825 }
 826 
 827 int
 828 random_get_bytes(uint8_t *ptr, size_t len)
 829 {
 830         return (random_get_bytes_common(ptr, len, random_fd));
 831 }
 832 
 833 int
 834 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 835 {
 836         return (random_get_bytes_common(ptr, len, urandom_fd));
 837 }
 838 
 839 int
 840 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
 841 {
 842         char *end;
 843 
 844         *result = strtoul(hw_serial, &end, base);
 845         if (*result == 0)
 846                 return (errno);
 847         return (0);
 848 }
 849 
 850 int
 851 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
 852 {
 853         char *end;
 854 
 855         *result = strtoull(str, &end, base);
 856         if (*result == 0)
 857                 return (errno);
 858         return (0);
 859 }
 860 
 861 /* ARGSUSED */
 862 cyclic_id_t
 863 cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
 864 {
 865         return (1);
 866 }
 867 
 868 /* ARGSUSED */
 869 void
 870 cyclic_remove(cyclic_id_t id)
 871 {
 872 }
 873 
 874 /* ARGSUSED */
 875 int
 876 cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
 877 {
 878         return (1);
 879 }
 880 
 881 /*
 882  * =========================================================================
 883  * kernel emulation setup & teardown
 884  * =========================================================================
 885  */
 886 static int
 887 umem_out_of_memory(void)
 888 {
 889         char errmsg[] = "out of memory -- generating core dump\n";
 890 
 891         write(fileno(stderr), errmsg, sizeof (errmsg));
 892         abort();
 893         return (0);
 894 }
 895 
 896 void
 897 kernel_init(int mode)
 898 {
 899         extern uint_t rrw_tsd_key;
 900 
 901         umem_nofail_callback(umem_out_of_memory);
 902 
 903         physmem = sysconf(_SC_PHYS_PAGES);
 904 
 905         dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
 906             (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 907 
 908         (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
 909             (mode & FWRITE) ? gethostid() : 0);
 910 
 911         VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
 912         VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
 913 
 914         system_taskq_init();
 915 
 916         mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
 917 
 918         spa_init(mode);
 919 
 920         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 921 }
 922 
 923 void
 924 kernel_fini(void)
 925 {
 926         spa_fini();
 927 
 928         system_taskq_fini();
 929 
 930         close(random_fd);
 931         close(urandom_fd);
 932 
 933         random_fd = -1;
 934         urandom_fd = -1;
 935 }
 936 
 937 int
 938 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
 939 {
 940         int ret;
 941         uLongf len = *dstlen;
 942 
 943         if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
 944                 *dstlen = (size_t)len;
 945 
 946         return (ret);
 947 }
 948 
 949 int
 950 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
 951     int level)
 952 {
 953         int ret;
 954         uLongf len = *dstlen;
 955 
 956         if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
 957                 *dstlen = (size_t)len;
 958 
 959         return (ret);
 960 }
 961 
 962 uid_t
 963 crgetuid(cred_t *cr)
 964 {
 965         return (0);
 966 }
 967 
 968 uid_t
 969 crgetruid(cred_t *cr)
 970 {
 971         return (0);
 972 }
 973 
 974 gid_t
 975 crgetgid(cred_t *cr)
 976 {
 977         return (0);
 978 }
 979 
 980 int
 981 crgetngroups(cred_t *cr)
 982 {
 983         return (0);
 984 }
 985 
 986 gid_t *
 987 crgetgroups(cred_t *cr)
 988 {
 989         return (NULL);
 990 }
 991 
 992 int
 993 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 994 {
 995         return (0);
 996 }
 997 
 998 int
 999 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1000 {
1001         return (0);
1002 }
1003 
1004 int
1005 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1006 {
1007         return (0);
1008 }
1009 
1010 ksiddomain_t *
1011 ksid_lookupdomain(const char *dom)
1012 {
1013         ksiddomain_t *kd;
1014 
1015         kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1016         kd->kd_name = spa_strdup(dom);
1017         return (kd);
1018 }
1019 
1020 void
1021 ksiddomain_rele(ksiddomain_t *ksid)
1022 {
1023         spa_strfree(ksid->kd_name);
1024         umem_free(ksid, sizeof (ksiddomain_t));
1025 }
1026 
1027 /*
1028  * Do not change the length of the returned string; it must be freed
1029  * with strfree().
1030  */
1031 char *
1032 kmem_asprintf(const char *fmt, ...)
1033 {
1034         int size;
1035         va_list adx;
1036         char *buf;
1037 
1038         va_start(adx, fmt);
1039         size = vsnprintf(NULL, 0, fmt, adx) + 1;
1040         va_end(adx);
1041 
1042         buf = kmem_alloc(size, KM_SLEEP);
1043 
1044         va_start(adx, fmt);
1045         size = vsnprintf(buf, size, fmt, adx);
1046         va_end(adx);
1047 
1048         return (buf);
1049 }
1050 
1051 /* ARGSUSED */
1052 int
1053 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1054 {
1055         *minorp = 0;
1056         return (0);
1057 }
1058 
1059 /* ARGSUSED */
1060 void
1061 zfs_onexit_fd_rele(int fd)
1062 {
1063 }
1064 
1065 /* ARGSUSED */
1066 int
1067 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1068     uint64_t *action_handle)
1069 {
1070         return (0);
1071 }
1072 
1073 /* ARGSUSED */
1074 int
1075 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1076 {
1077         return (0);
1078 }
1079 
1080 /* ARGSUSED */
1081 int
1082 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1083 {
1084         return (0);
1085 }
1086 
1087 void
1088 bioinit(buf_t *bp)
1089 {
1090         bzero(bp, sizeof (buf_t));
1091 }
1092 
1093 void
1094 biodone(buf_t *bp)
1095 {
1096         if (bp->b_iodone != NULL) {
1097                 (*(bp->b_iodone))(bp);
1098                 return;
1099         }
1100         ASSERT((bp->b_flags & B_DONE) == 0);
1101         bp->b_flags |= B_DONE;
1102 }
1103 
1104 void
1105 bioerror(buf_t *bp, int error)
1106 {
1107         ASSERT(bp != NULL);
1108         ASSERT(error >= 0);
1109 
1110         if (error != 0) {
1111                 bp->b_flags |= B_ERROR;
1112         } else {
1113                 bp->b_flags &= ~B_ERROR;
1114         }
1115         bp->b_error = error;
1116 }
1117 
1118 
1119 int
1120 geterror(struct buf *bp)
1121 {
1122         int error = 0;
1123 
1124         if (bp->b_flags & B_ERROR) {
1125                 error = bp->b_error;
1126                 if (!error)
1127                         error = EIO;
1128         }
1129         return (error);
1130 }