Print this page
Fixup merge results
re #12393 rb3935 Kerberos and smbd disagree about who is our AD server (fix elf runtime attributes check)
re #11612 rb3907 Failing vdev of a mirrored pool should not take zfs operations out of action for extended periods of time.


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2012 by Delphix. All rights reserved.

  28  */
  29 
  30 #include <sys/spa.h>
  31 #include <sys/spa_impl.h>
  32 #include <sys/vdev.h>
  33 #include <sys/vdev_impl.h>
  34 #include <sys/zio.h>
  35 #include <sys/zio_checksum.h>
  36 
  37 #include <sys/fm/fs/zfs.h>
  38 #include <sys/fm/protocol.h>
  39 #include <sys/fm/util.h>
  40 #include <sys/sysevent.h>
  41 
  42 /*
  43  * This general routine is responsible for generating all the different ZFS
  44  * ereports.  The payload is dependent on the class, and which arguments are
  45  * supplied to the function:
  46  *
  47  *      EREPORT                 POOL    VDEV    IO


 288                         if (pvd->vdev_devid)
 289                                 fm_payload_set(ereport,
 290                                     FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
 291                                     DATA_TYPE_STRING, pvd->vdev_devid, NULL);
 292                 }
 293         }
 294 
 295         if (zio != NULL) {
 296                 /*
 297                  * Payload common to all I/Os.
 298                  */
 299                 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
 300                     DATA_TYPE_INT32, zio->io_error, NULL);
 301 
 302                 /*
 303                  * If the 'size' parameter is non-zero, it indicates this is a
 304                  * RAID-Z or other I/O where the physical offset and length are
 305                  * provided for us, instead of within the zio_t.
 306                  */
 307                 if (vd != NULL) {
 308                         if (size)





 309                                 fm_payload_set(ereport,






 310                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 311                                     DATA_TYPE_UINT64, stateoroffset,
 312                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 313                                     DATA_TYPE_UINT64, size, NULL);
 314                         else
 315                                 fm_payload_set(ereport,
 316                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 317                                     DATA_TYPE_UINT64, zio->io_offset,
 318                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 319                                     DATA_TYPE_UINT64, zio->io_size, NULL);
 320                 }
 321 
 322                 /*
 323                  * Payload for I/Os with corresponding logical information.
 324                  */
 325                 if (zio->io_logical != NULL)
 326                         fm_payload_set(ereport,
 327                             FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
 328                             DATA_TYPE_UINT64,
 329                             zio->io_logical->io_bookmark.zb_objset,


 343                  * vdev.
 344                  */
 345                 fm_payload_set(ereport,
 346                     FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
 347                     DATA_TYPE_UINT64, stateoroffset, NULL);
 348         }
 349 
 350         mutex_exit(&spa->spa_errlist_lock);
 351 
 352         *ereport_out = ereport;
 353         *detector_out = detector;
 354 }
 355 
 356 /* if it's <= 128 bytes, save the corruption directly */
 357 #define ZFM_MAX_INLINE          (128 / sizeof (uint64_t))
 358 
 359 #define MAX_RANGES              16
 360 
 361 typedef struct zfs_ecksum_info {
 362         /* histograms of set and cleared bits by bit number in a 64-bit word */
 363         uint32_t zei_histogram_set[sizeof (uint64_t) * NBBY];
 364         uint32_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
 365 
 366         /* inline arrays of bits set and cleared. */
 367         uint64_t zei_bits_set[ZFM_MAX_INLINE];
 368         uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
 369 
 370         /*
 371          * for each range, the number of bits set and cleared.  The Hamming
 372          * distance between the good and bad buffers is the sum of them all.
 373          */
 374         uint32_t zei_range_sets[MAX_RANGES];
 375         uint32_t zei_range_clears[MAX_RANGES];
 376 
 377         struct zei_ranges {
 378                 uint32_t        zr_start;
 379                 uint32_t        zr_end;
 380         } zei_ranges[MAX_RANGES];
 381 
 382         size_t  zei_range_count;
 383         uint32_t zei_mingap;
 384         uint32_t zei_allowed_mingap;
 385 
 386 } zfs_ecksum_info_t;
 387 
 388 static void
 389 update_histogram(uint64_t value_arg, uint32_t *hist, uint32_t *count)
 390 {
 391         size_t i;
 392         size_t bits = 0;
 393         uint64_t value = BE_64(value_arg);
 394 
 395         /* We store the bits in big-endian (largest-first) order */
 396         for (i = 0; i < 64; i++) {
 397                 if (value & (1ull << i)) {
 398                         hist[63 - i]++;
 399                         ++bits;
 400                 }
 401         }
 402         /* update the count of bits changed */
 403         *count += bits;
 404 }
 405 
 406 /*
 407  * We've now filled up the range array, and need to increase "mingap" and
 408  * shrink the range list accordingly.  zei_mingap is always the smallest
 409  * distance between array entries, so we set the new_allowed_gap to be


 535                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
 536                     DATA_TYPE_UINT64_ARRAY,
 537                     sizeof (info->zbc_actual) / sizeof (uint64_t),
 538                     (uint64_t *)&info->zbc_actual,
 539                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
 540                     DATA_TYPE_STRING,
 541                     info->zbc_checksum_name,
 542                     NULL);
 543 
 544                 if (info->zbc_byteswapped) {
 545                         fm_payload_set(ereport,
 546                             FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
 547                             DATA_TYPE_BOOLEAN, 1,
 548                             NULL);
 549                 }
 550         }
 551 
 552         if (badbuf == NULL || goodbuf == NULL)
 553                 return (eip);
 554 
 555         ASSERT3U(nui64s, <=, UINT32_MAX);
 556         ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
 557         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 558         ASSERT3U(size, <=, UINT32_MAX);
 559 
 560         /* build up the range list by comparing the two buffers. */
 561         for (idx = 0; idx < nui64s; idx++) {
 562                 if (good[idx] == bad[idx]) {
 563                         if (start == -1)
 564                                 continue;
 565 
 566                         add_range(eip, start, idx);
 567                         start = -1;
 568                 } else {
 569                         if (start != -1)
 570                                 continue;
 571 
 572                         start = idx;
 573                 }
 574         }
 575         if (start != -1)


 637             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
 638             DATA_TYPE_UINT32, eip->zei_allowed_mingap,
 639             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
 640             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
 641             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
 642             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
 643             NULL);
 644 
 645         if (!no_inline) {
 646                 fm_payload_set(ereport,
 647                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
 648                     DATA_TYPE_UINT8_ARRAY,
 649                     inline_size, (uint8_t *)eip->zei_bits_set,
 650                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
 651                     DATA_TYPE_UINT8_ARRAY,
 652                     inline_size, (uint8_t *)eip->zei_bits_cleared,
 653                     NULL);
 654         } else {
 655                 fm_payload_set(ereport,
 656                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
 657                     DATA_TYPE_UINT32_ARRAY,
 658                     NBBY * sizeof (uint64_t), eip->zei_histogram_set,
 659                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
 660                     DATA_TYPE_UINT32_ARRAY,
 661                     NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
 662                     NULL);
 663         }
 664         return (eip);
 665 }
 666 #endif
 667 
 668 void
 669 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
 670     uint64_t stateoroffset, uint64_t size)
 671 {
 672 #ifdef _KERNEL
 673         nvlist_t *ereport = NULL;
 674         nvlist_t *detector = NULL;
 675 
 676         zfs_ereport_start(&ereport, &detector,
 677             subclass, spa, vd, zio, stateoroffset, size);
 678 
 679         if (ereport == NULL)
 680                 return;




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.



  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/spa.h>
  29 #include <sys/spa_impl.h>
  30 #include <sys/vdev.h>
  31 #include <sys/vdev_impl.h>
  32 #include <sys/zio.h>
  33 #include <sys/zio_checksum.h>
  34 
  35 #include <sys/fm/fs/zfs.h>
  36 #include <sys/fm/protocol.h>
  37 #include <sys/fm/util.h>
  38 #include <sys/sysevent.h>
  39 
  40 /*
  41  * This general routine is responsible for generating all the different ZFS
  42  * ereports.  The payload is dependent on the class, and which arguments are
  43  * supplied to the function:
  44  *
  45  *      EREPORT                 POOL    VDEV    IO


 286                         if (pvd->vdev_devid)
 287                                 fm_payload_set(ereport,
 288                                     FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
 289                                     DATA_TYPE_STRING, pvd->vdev_devid, NULL);
 290                 }
 291         }
 292 
 293         if (zio != NULL) {
 294                 /*
 295                  * Payload common to all I/Os.
 296                  */
 297                 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
 298                     DATA_TYPE_INT32, zio->io_error, NULL);
 299 
 300                 /*
 301                  * If the 'size' parameter is non-zero, it indicates this is a
 302                  * RAID-Z or other I/O where the physical offset and length are
 303                  * provided for us, instead of within the zio_t.
 304                  */
 305                 if (vd != NULL) {
 306                         /*
 307                          * The 'stateoroffset' and 'size' parameters are
 308                          * overloaded to represent the timeout and latency,
 309                          * respectively, in a timeout report.
 310                          */
 311                         if (strcmp(subclass, FM_EREPORT_ZFS_TIMEOUT) == 0)
 312                                 fm_payload_set(ereport,
 313                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMEOUT,
 314                                     DATA_TYPE_UINT64, stateoroffset,
 315                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_LATENCY,
 316                                     DATA_TYPE_UINT64, size, NULL);
 317                         else if (size)
 318                                 fm_payload_set(ereport,
 319                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 320                                     DATA_TYPE_UINT64, stateoroffset,
 321                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 322                                     DATA_TYPE_UINT64, size, NULL);
 323                         else
 324                                 fm_payload_set(ereport,
 325                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 326                                     DATA_TYPE_UINT64, zio->io_offset,
 327                                     FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 328                                     DATA_TYPE_UINT64, zio->io_size, NULL);
 329                 }
 330 
 331                 /*
 332                  * Payload for I/Os with corresponding logical information.
 333                  */
 334                 if (zio->io_logical != NULL)
 335                         fm_payload_set(ereport,
 336                             FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
 337                             DATA_TYPE_UINT64,
 338                             zio->io_logical->io_bookmark.zb_objset,


 352                  * vdev.
 353                  */
 354                 fm_payload_set(ereport,
 355                     FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
 356                     DATA_TYPE_UINT64, stateoroffset, NULL);
 357         }
 358 
 359         mutex_exit(&spa->spa_errlist_lock);
 360 
 361         *ereport_out = ereport;
 362         *detector_out = detector;
 363 }
 364 
 365 /* if it's <= 128 bytes, save the corruption directly */
 366 #define ZFM_MAX_INLINE          (128 / sizeof (uint64_t))
 367 
 368 #define MAX_RANGES              16
 369 
 370 typedef struct zfs_ecksum_info {
 371         /* histograms of set and cleared bits by bit number in a 64-bit word */
 372         uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY];
 373         uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
 374 
 375         /* inline arrays of bits set and cleared. */
 376         uint64_t zei_bits_set[ZFM_MAX_INLINE];
 377         uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
 378 
 379         /*
 380          * for each range, the number of bits set and cleared.  The Hamming
 381          * distance between the good and bad buffers is the sum of them all.
 382          */
 383         uint32_t zei_range_sets[MAX_RANGES];
 384         uint32_t zei_range_clears[MAX_RANGES];
 385 
 386         struct zei_ranges {
 387                 uint32_t        zr_start;
 388                 uint32_t        zr_end;
 389         } zei_ranges[MAX_RANGES];
 390 
 391         size_t  zei_range_count;
 392         uint32_t zei_mingap;
 393         uint32_t zei_allowed_mingap;
 394 
 395 } zfs_ecksum_info_t;
 396 
 397 static void
 398 update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
 399 {
 400         size_t i;
 401         size_t bits = 0;
 402         uint64_t value = BE_64(value_arg);
 403 
 404         /* We store the bits in big-endian (largest-first) order */
 405         for (i = 0; i < 64; i++) {
 406                 if (value & (1ull << i)) {
 407                         hist[63 - i]++;
 408                         ++bits;
 409                 }
 410         }
 411         /* update the count of bits changed */
 412         *count += bits;
 413 }
 414 
 415 /*
 416  * We've now filled up the range array, and need to increase "mingap" and
 417  * shrink the range list accordingly.  zei_mingap is always the smallest
 418  * distance between array entries, so we set the new_allowed_gap to be


 544                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
 545                     DATA_TYPE_UINT64_ARRAY,
 546                     sizeof (info->zbc_actual) / sizeof (uint64_t),
 547                     (uint64_t *)&info->zbc_actual,
 548                     FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
 549                     DATA_TYPE_STRING,
 550                     info->zbc_checksum_name,
 551                     NULL);
 552 
 553                 if (info->zbc_byteswapped) {
 554                         fm_payload_set(ereport,
 555                             FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
 556                             DATA_TYPE_BOOLEAN, 1,
 557                             NULL);
 558                 }
 559         }
 560 
 561         if (badbuf == NULL || goodbuf == NULL)
 562                 return (eip);
 563 
 564         ASSERT3U(nui64s, <=, UINT16_MAX);
 565         ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
 566         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 567         ASSERT3U(size, <=, UINT32_MAX);
 568 
 569         /* build up the range list by comparing the two buffers. */
 570         for (idx = 0; idx < nui64s; idx++) {
 571                 if (good[idx] == bad[idx]) {
 572                         if (start == -1)
 573                                 continue;
 574 
 575                         add_range(eip, start, idx);
 576                         start = -1;
 577                 } else {
 578                         if (start != -1)
 579                                 continue;
 580 
 581                         start = idx;
 582                 }
 583         }
 584         if (start != -1)


 646             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
 647             DATA_TYPE_UINT32, eip->zei_allowed_mingap,
 648             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
 649             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
 650             FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
 651             DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
 652             NULL);
 653 
 654         if (!no_inline) {
 655                 fm_payload_set(ereport,
 656                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
 657                     DATA_TYPE_UINT8_ARRAY,
 658                     inline_size, (uint8_t *)eip->zei_bits_set,
 659                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
 660                     DATA_TYPE_UINT8_ARRAY,
 661                     inline_size, (uint8_t *)eip->zei_bits_cleared,
 662                     NULL);
 663         } else {
 664                 fm_payload_set(ereport,
 665                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
 666                     DATA_TYPE_UINT16_ARRAY,
 667                     NBBY * sizeof (uint64_t), eip->zei_histogram_set,
 668                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
 669                     DATA_TYPE_UINT16_ARRAY,
 670                     NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
 671                     NULL);
 672         }
 673         return (eip);
 674 }
 675 #endif
 676 
 677 void
 678 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
 679     uint64_t stateoroffset, uint64_t size)
 680 {
 681 #ifdef _KERNEL
 682         nvlist_t *ereport = NULL;
 683         nvlist_t *detector = NULL;
 684 
 685         zfs_ereport_start(&ereport, &detector,
 686             subclass, spa, vd, zio, stateoroffset, size);
 687 
 688         if (ereport == NULL)
 689                 return;