1 /*
   2  * LZ4 - Fast LZ compression algorithm
   3  * Header File
   4  * Copyright (C) 2011-2013, Yann Collet.
   5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are
   9  * met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  * notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above
  14  * copyright notice, this list of conditions and the following disclaimer
  15  * in the documentation and/or other materials provided with the
  16  * distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * You can contact the author at :
  31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  32  * - LZ4 source repository : http://code.google.com/p/lz4/
  33  */
  34 
  35 #include <sys/zfs_context.h>
  36 
  37 static int real_LZ4_compress(const char *source, char *dest, int isize,
  38     int osize);
  39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
  40 static int LZ4_compressBound(int isize);
  41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
  42     int isize, int maxOutputSize);
  43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
  44     int isize, int osize);
  45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
  46     int isize, int osize);
  47 
  48 /*ARGSUSED*/
  49 size_t
  50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
  51 {
  52         uint32_t bufsiz;
  53         char *dest = d_start;
  54 
  55         ASSERT(d_len >= sizeof (bufsiz));
  56 
  57         bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
  58             d_len - sizeof (bufsiz));
  59 
  60         /* Signal an error if the compression routine returned zero. */
  61         if (bufsiz == 0)
  62                 return (s_len);
  63 
  64         /*
  65          * Encode the compresed buffer size at the start. We'll need this in
  66          * decompression to counter the effects of padding which might be
  67          * added to the compressed buffer and which, if unhandled, would
  68          * confuse the hell out of our decompression function.
  69          */
  70         *(uint32_t *)dest = BE_32(bufsiz);
  71 
  72         return (bufsiz + sizeof (bufsiz));
  73 }
  74 
  75 /*ARGSUSED*/
  76 int
  77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
  78 {
  79         const char *src = s_start;
  80         uint32_t bufsiz = BE_IN32(src);
  81 
  82         /* invalid compressed buffer size encoded at start */
  83         if (bufsiz + sizeof (bufsiz) > s_len)
  84                 return (1);
  85 
  86         /*
  87          * Returns 0 on success (decompression function returned non-negative)
  88          * and non-zero on failure (decompression function returned negative.
  89          */
  90         return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
  91             d_start, bufsiz, d_len) < 0);
  92 }
  93 
  94 /*
  95  * LZ4 API Description:
  96  *
  97  * Simple Functions:
  98  * real_LZ4_compress() :
  99  *      isize  : is the input size. Max supported value is ~1.9GB
 100  *      return : the number of bytes written in buffer dest
 101  *               or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
 102  *      note : destination buffer must be already allocated.
 103  *              destination buffer must be sized to handle worst cases
 104  *              situations (input data not compressible) worst case size
 105  *              evaluation is provided by function LZ4_compressBound().
 106  *
 107  * real_LZ4_uncompress() :
 108  *      osize  : is the output size, therefore the original size
 109  *      return : the number of bytes read in the source buffer.
 110  *              If the source stream is malformed, the function will stop
 111  *              decoding and return a negative result, indicating the byte
 112  *              position of the faulty instruction. This function never
 113  *              writes beyond dest + osize, and is therefore protected
 114  *              against malicious data packets.
 115  *      note : destination buffer must be already allocated
 116  *
 117  * Advanced Functions
 118  *
 119  * LZ4_compressBound() :
 120  *      Provides the maximum size that LZ4 may output in a "worst case"
 121  *      scenario (input data not compressible) primarily useful for memory
 122  *      allocation of output buffer.
 123  *
 124  *      isize  : is the input size. Max supported value is ~1.9GB
 125  *      return : maximum output size in a "worst case" scenario
 126  *      note : this function is limited by "int" range (2^31-1)
 127  *
 128  * LZ4_uncompress_unknownOutputSize() :
 129  *      isize  : is the input size, therefore the compressed size
 130  *      maxOutputSize : is the size of the destination buffer (which must be
 131  *              already allocated)
 132  *      return : the number of bytes decoded in the destination buffer
 133  *              (necessarily <= maxOutputSize). If the source stream is
 134  *              malformed, the function will stop decoding and return a
 135  *              negative result, indicating the byte position of the faulty
 136  *              instruction. This function never writes beyond dest +
 137  *              maxOutputSize, and is therefore protected against malicious
 138  *              data packets.
 139  *      note   : Destination buffer must be already allocated.
 140  *              This version is slightly slower than real_LZ4_uncompress()
 141  *
 142  * LZ4_compressCtx() :
 143  *      This function explicitly handles the CTX memory structure.
 144  *
 145  *      ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
 146  *      by the caller (either on the stack or using kmem_zalloc). Passing NULL
 147  *      isn't valid.
 148  *
 149  * LZ4_compress64kCtx() :
 150  *      Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
 151  *      isize *Must* be <64KB, otherwise the output will be corrupted.
 152  *
 153  *      ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
 154  *      by the caller (either on the stack or using kmem_zalloc). Passing NULL
 155  *      isn't valid.
 156  */
 157 
 158 /*
 159  * Tuning parameters
 160  */
 161 
 162 /*
 163  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
 164  *       Lowering this value reduces memory usage. Reduced memory usage
 165  *      typically improves speed, due to cache effect (ex: L1 32KB for Intel,
 166  *      L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
 167  *      (examples : 12 -> 16KB ; 17 -> 512KB)
 168  */
 169 #define COMPRESSIONLEVEL 12
 170 
 171 /*
 172  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
 173  *      algorithm skip faster data segments considered "incompressible".
 174  *      This may decrease compression ratio dramatically, but will be
 175  *      faster on incompressible data. Increasing this value will make
 176  *      the algorithm search more before declaring a segment "incompressible".
 177  *      This could improve compression a bit, but will be slower on
 178  *      incompressible data. The default value (6) is recommended.
 179  */
 180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
 181 
 182 /*
 183  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
 184  * performance for big endian cpu, but the resulting compressed stream
 185  * will be incompatible with little-endian CPU. You can set this option
 186  * to 1 in situations where data will stay within closed environment.
 187  * This option is useless on Little_Endian CPU (such as x86).
 188  */
 189 /* #define      BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
 190 
 191 /*
 192  * CPU Feature Detection
 193  */
 194 
 195 /* 32 or 64 bits ? */
 196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
 197     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
 198     defined(__LP64__) || defined(_LP64))
 199 #define LZ4_ARCH64 1
 200 #else
 201 #define LZ4_ARCH64 0
 202 #endif
 203 
 204 /*
 205  * Limits the amount of stack space that the algorithm may consume to hold
 206  * the compression lookup table. The value `9' here means we'll never use
 207  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
 208  * If more memory is needed, it is allocated from the heap.
 209  */
 210 #define STACKLIMIT 9
 211 
 212 /*
 213  * Little Endian or Big Endian?
 214  * Note: overwrite the below #define if you know your architecture endianess.
 215  */
 216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
 217         defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
 218         defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
 219         defined(__powerpc) || defined(powerpc) || \
 220         ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
 221 #define LZ4_BIG_ENDIAN 1
 222 #else
 223 /*
 224  * Little Endian assumed. PDP Endian and other very rare endian format
 225  * are unsupported.
 226  */
 227 #endif
 228 
 229 /*
 230  * Unaligned memory access is automatically enabled for "common" CPU,
 231  * such as x86. For others CPU, the compiler will be more cautious, and
 232  * insert extra code to ensure aligned access is respected. If you know
 233  * your target CPU supports unaligned memory access, you may want to
 234  * force this option manually to improve performance
 235  */
 236 #if defined(__ARM_FEATURE_UNALIGNED)
 237 #define LZ4_FORCE_UNALIGNED_ACCESS 1
 238 #endif
 239 
 240 #ifdef __sparc
 241 #define LZ4_FORCE_SW_BITCOUNT
 242 #endif
 243 
 244 /*
 245  * Compiler Options
 246  */
 247 #if __STDC_VERSION__ >= 199901L      /* C99 */
 248 /* "restrict" is a known keyword */
 249 #else
 250 /* Disable restrict */
 251 #define restrict
 252 #endif
 253 
 254 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 255 
 256 #ifdef _MSC_VER
 257 /* Visual Studio */
 258 /* Visual is not C99, but supports some kind of inline */
 259 #define inline __forceinline
 260 #if LZ4_ARCH64
 261 /* For Visual 2005 */
 262 #pragma intrinsic(_BitScanForward64)
 263 #pragma intrinsic(_BitScanReverse64)
 264 #else /* !LZ4_ARCH64 */
 265 /* For Visual 2005 */
 266 #pragma intrinsic(_BitScanForward)
 267 #pragma intrinsic(_BitScanReverse)
 268 #endif /* !LZ4_ARCH64 */
 269 #endif /* _MSC_VER */
 270 
 271 #ifdef _MSC_VER
 272 #define lz4_bswap16(x) _byteswap_ushort(x)
 273 #else /* !_MSC_VER */
 274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
 275         (((x) & 0xffu) << 8)))
 276 #endif /* !_MSC_VER */
 277 
 278 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
 279 #define expect(expr, value)    (__builtin_expect((expr), (value)))
 280 #else
 281 #define expect(expr, value)    (expr)
 282 #endif
 283 
 284 #define likely(expr)    expect((expr) != 0, 1)
 285 #define unlikely(expr)  expect((expr) != 0, 0)
 286 
 287 /* Basic types */
 288 #if defined(_MSC_VER)
 289 /* Visual Studio does not support 'stdint' natively */
 290 #define BYTE    unsigned __int8
 291 #define U16     unsigned __int16
 292 #define U32     unsigned __int32
 293 #define S32     __int32
 294 #define U64     unsigned __int64
 295 #else /* !defined(_MSC_VER) */
 296 #define BYTE    uint8_t
 297 #define U16     uint16_t
 298 #define U32     uint32_t
 299 #define S32     int32_t
 300 #define U64     uint64_t
 301 #endif /* !defined(_MSC_VER) */
 302 
 303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 304 #pragma pack(1)
 305 #endif
 306 
 307 typedef struct _U16_S {
 308         U16 v;
 309 } U16_S;
 310 typedef struct _U32_S {
 311         U32 v;
 312 } U32_S;
 313 typedef struct _U64_S {
 314         U64 v;
 315 } U64_S;
 316 
 317 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 318 #pragma pack()
 319 #endif
 320 
 321 #define A64(x) (((U64_S *)(x))->v)
 322 #define A32(x) (((U32_S *)(x))->v)
 323 #define A16(x) (((U16_S *)(x))->v)
 324 
 325 /*
 326  * Constants
 327  */
 328 #define MINMATCH 4
 329 
 330 #define HASH_LOG COMPRESSIONLEVEL
 331 #define HASHTABLESIZE (1 << HASH_LOG)
 332 #define HASH_MASK (HASHTABLESIZE - 1)
 333 
 334 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
 335         NOTCOMPRESSIBLE_CONFIRMATION : 2)
 336 
 337 /*
 338  * Defines if memory is allocated into the stack (local variable),
 339  * or into the heap (kmem_alloc()).
 340  */
 341 #define HEAPMODE (HASH_LOG > STACKLIMIT)
 342 #define COPYLENGTH 8
 343 #define LASTLITERALS 5
 344 #define MFLIMIT (COPYLENGTH + MINMATCH)
 345 #define MINLENGTH (MFLIMIT + 1)
 346 
 347 #define MAXD_LOG 16
 348 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
 349 
 350 #define ML_BITS 4
 351 #define ML_MASK ((1U<<ML_BITS)-1)
 352 #define RUN_BITS (8-ML_BITS)
 353 #define RUN_MASK ((1U<<RUN_BITS)-1)
 354 
 355 
 356 /*
 357  * Architecture-specific macros
 358  */
 359 #if LZ4_ARCH64
 360 #define STEPSIZE 8
 361 #define UARCH U64
 362 #define AARCH A64
 363 #define LZ4_COPYSTEP(s, d)      A64(d) = A64(s); d += 8; s += 8;
 364 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d)
 365 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
 366 #define HTYPE U32
 367 #define INITBASE(base)          const BYTE* const base = ip
 368 #else /* !LZ4_ARCH64 */
 369 #define STEPSIZE 4
 370 #define UARCH U32
 371 #define AARCH A32
 372 #define LZ4_COPYSTEP(s, d)      A32(d) = A32(s); d += 4; s += 4;
 373 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
 374 #define LZ4_SECURECOPY          LZ4_WILDCOPY
 375 #define HTYPE const BYTE *
 376 #define INITBASE(base)          const int base = 0
 377 #endif /* !LZ4_ARCH64 */
 378 
 379 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
 380 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 381         { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
 382 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
 383         { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
 384 #else
 385 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
 386 #define LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
 387 #endif
 388 
 389 
 390 /* Local structures */
 391 struct refTables {
 392         HTYPE hashTable[HASHTABLESIZE];
 393 };
 394 
 395 
 396 /* Macros */
 397 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
 398         HASH_LOG))
 399 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
 400 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
 401 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
 402         d = e; }
 403 
 404 
 405 /* Private functions */
 406 #if LZ4_ARCH64
 407 
 408 static inline int
 409 LZ4_NbCommonBytes(register U64 val)
 410 {
 411 #if defined(LZ4_BIG_ENDIAN)
 412 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 413         unsigned long r = 0;
 414         _BitScanReverse64(&r, val);
 415         return (int)(r >> 3);
 416 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 417         !defined(LZ4_FORCE_SW_BITCOUNT)
 418         return (__builtin_clzll(val) >> 3);
 419 #else
 420         int r;
 421         if (!(val >> 32)) {
 422                 r = 4;
 423         } else {
 424                 r = 0;
 425                 val >>= 32;
 426         }
 427         if (!(val >> 16)) {
 428                 r += 2;
 429                 val >>= 8;
 430         } else {
 431                 val >>= 24;
 432         }
 433         r += (!val);
 434         return (r);
 435 #endif
 436 #else
 437 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 438         unsigned long r = 0;
 439         _BitScanForward64(&r, val);
 440         return (int)(r >> 3);
 441 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 442         !defined(LZ4_FORCE_SW_BITCOUNT)
 443         return (__builtin_ctzll(val) >> 3);
 444 #else
 445         static const int DeBruijnBytePos[64] =
 446             { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
 447                 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
 448                 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
 449                 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
 450         };
 451         return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
 452             58];
 453 #endif
 454 #endif
 455 }
 456 
 457 #else
 458 
 459 static inline int
 460 LZ4_NbCommonBytes(register U32 val)
 461 {
 462 #if defined(LZ4_BIG_ENDIAN)
 463 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 464         unsigned long r = 0;
 465         _BitScanReverse(&r, val);
 466         return (int)(r >> 3);
 467 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 468         !defined(LZ4_FORCE_SW_BITCOUNT)
 469         return (__builtin_clz(val) >> 3);
 470 #else
 471         int r;
 472         if (!(val >> 16)) {
 473                 r = 2;
 474                 val >>= 8;
 475         } else {
 476                 r = 0;
 477                 val >>= 24;
 478         }
 479         r += (!val);
 480         return (r);
 481 #endif
 482 #else
 483 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 484         unsigned long r = 0;
 485         _BitScanForward(&r, val);
 486         return (int)(r >> 3);
 487 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 488         !defined(LZ4_FORCE_SW_BITCOUNT)
 489         return (__builtin_ctz(val) >> 3);
 490 #else
 491         static const int DeBruijnBytePos[32] = {
 492                 0, 0, 3, 0, 3, 1, 3, 0,
 493                 3, 2, 2, 1, 3, 2, 0, 1,
 494                 3, 3, 1, 2, 2, 2, 2, 0,
 495                 3, 1, 2, 0, 1, 0, 1, 1
 496         };
 497         return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
 498             27];
 499 #endif
 500 #endif
 501 }
 502 
 503 #endif
 504 
 505 /* Public functions */
 506 
 507 static int
 508 LZ4_compressBound(int isize)
 509 {
 510         return (isize + (isize / 255) + 16);
 511 }
 512 
 513 /* Compression functions */
 514 
 515 /*ARGSUSED*/
 516 static int
 517 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
 518     int osize)
 519 {
 520 #if HEAPMODE
 521         struct refTables *srt = (struct refTables *)ctx;
 522         HTYPE *HashTable = (HTYPE *) (srt->hashTable);
 523 #else
 524         HTYPE HashTable[HASHTABLESIZE] = { 0 };
 525 #endif
 526 
 527         const BYTE *ip = (BYTE *) source;
 528         INITBASE(base);
 529         const BYTE *anchor = ip;
 530         const BYTE *const iend = ip + isize;
 531         const BYTE *const oend = (BYTE *) dest + osize;
 532         const BYTE *const mflimit = iend - MFLIMIT;
 533 #define matchlimit (iend - LASTLITERALS)
 534 
 535         BYTE *op = (BYTE *) dest;
 536 
 537         int len, length;
 538         const int skipStrength = SKIPSTRENGTH;
 539         U32 forwardH;
 540 
 541 
 542         /* Init */
 543         if (isize < MINLENGTH)
 544                 goto _last_literals;
 545 
 546         /* First Byte */
 547         HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 548         ip++;
 549         forwardH = LZ4_HASH_VALUE(ip);
 550 
 551         /* Main Loop */
 552         for (;;) {
 553                 int findMatchAttempts = (1U << skipStrength) + 3;
 554                 const BYTE *forwardIp = ip;
 555                 const BYTE *ref;
 556                 BYTE *token;
 557 
 558                 /* Find a match */
 559                 do {
 560                         U32 h = forwardH;
 561                         int step = findMatchAttempts++ >> skipStrength;
 562                         ip = forwardIp;
 563                         forwardIp = ip + step;
 564 
 565                         if unlikely(forwardIp > mflimit) {
 566                                 goto _last_literals;
 567                         }
 568 
 569                         forwardH = LZ4_HASH_VALUE(forwardIp);
 570                         ref = base + HashTable[h];
 571                         HashTable[h] = ip - base;
 572 
 573                 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
 574 
 575                 /* Catch up */
 576                 while ((ip > anchor) && (ref > (BYTE *) source) &&
 577                     unlikely(ip[-1] == ref[-1])) {
 578                         ip--;
 579                         ref--;
 580                 }
 581 
 582                 /* Encode Literal length */
 583                 length = ip - anchor;
 584                 token = op++;
 585 
 586                 /* Check output limit */
 587                 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 588                     (length >> 8) > oend)
 589                         return (0);
 590 
 591                 if (length >= (int)RUN_MASK) {
 592                         *token = (RUN_MASK << ML_BITS);
 593                         len = length - RUN_MASK;
 594                         for (; len > 254; len -= 255)
 595                                 *op++ = 255;
 596                         *op++ = (BYTE)len;
 597                 } else
 598                         *token = (length << ML_BITS);
 599 
 600                 /* Copy Literals */
 601                 LZ4_BLINDCOPY(anchor, op, length);
 602 
 603                 _next_match:
 604                 /* Encode Offset */
 605                 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 606 
 607                 /* Start Counting */
 608                 ip += MINMATCH;
 609                 ref += MINMATCH;        /* MinMatch verified */
 610                 anchor = ip;
 611                 while likely(ip < matchlimit - (STEPSIZE - 1)) {
 612                         UARCH diff = AARCH(ref) ^ AARCH(ip);
 613                         if (!diff) {
 614                                 ip += STEPSIZE;
 615                                 ref += STEPSIZE;
 616                                 continue;
 617                         }
 618                         ip += LZ4_NbCommonBytes(diff);
 619                         goto _endCount;
 620                 }
 621 #if LZ4_ARCH64
 622                 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 623                         ip += 4;
 624                         ref += 4;
 625                 }
 626 #endif
 627                 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 628                         ip += 2;
 629                         ref += 2;
 630                 }
 631                 if ((ip < matchlimit) && (*ref == *ip))
 632                         ip++;
 633                 _endCount:
 634 
 635                 /* Encode MatchLength */
 636                 len = (ip - anchor);
 637                 /* Check output limit */
 638                 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 639                         return (0);
 640                 if (len >= (int)ML_MASK) {
 641                         *token += ML_MASK;
 642                         len -= ML_MASK;
 643                         for (; len > 509; len -= 510) {
 644                                 *op++ = 255;
 645                                 *op++ = 255;
 646                         }
 647                         if (len > 254) {
 648                                 len -= 255;
 649                                 *op++ = 255;
 650                         }
 651                         *op++ = (BYTE)len;
 652                 } else
 653                         *token += len;
 654 
 655                 /* Test end of chunk */
 656                 if (ip > mflimit) {
 657                         anchor = ip;
 658                         break;
 659                 }
 660                 /* Fill table */
 661                 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
 662 
 663                 /* Test next position */
 664                 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
 665                 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 666                 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
 667                         token = op++;
 668                         *token = 0;
 669                         goto _next_match;
 670                 }
 671                 /* Prepare next loop */
 672                 anchor = ip++;
 673                 forwardH = LZ4_HASH_VALUE(ip);
 674         }
 675 
 676         _last_literals:
 677         /* Encode Last Literals */
 678         {
 679                 int lastRun = iend - anchor;
 680                 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 681                     oend)
 682                         return (0);
 683                 if (lastRun >= (int)RUN_MASK) {
 684                         *op++ = (RUN_MASK << ML_BITS);
 685                         lastRun -= RUN_MASK;
 686                         for (; lastRun > 254; lastRun -= 255) {
 687                                 *op++ = 255;
 688                         }
 689                         *op++ = (BYTE)lastRun;
 690                 } else
 691                         *op++ = (lastRun << ML_BITS);
 692                 (void) memcpy(op, anchor, iend - anchor);
 693                 op += iend - anchor;
 694         }
 695 
 696         /* End */
 697         return (int)(((char *)op) - dest);
 698 }
 699 
 700 
 701 
 702 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
 703 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
 704 #define HASHLOG64K (HASH_LOG + 1)
 705 #define HASH64KTABLESIZE (1U << HASHLOG64K)
 706 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
 707         HASHLOG64K))
 708 #define LZ4_HASH64K_VALUE(p)    LZ4_HASH64K_FUNCTION(A32(p))
 709 
 710 /*ARGSUSED*/
 711 static int
 712 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
 713     int osize)
 714 {
 715 #if HEAPMODE
 716         struct refTables *srt = (struct refTables *)ctx;
 717         U16 *HashTable = (U16 *) (srt->hashTable);
 718 #else
 719         U16 HashTable[HASH64KTABLESIZE] = { 0 };
 720 #endif
 721 
 722         const BYTE *ip = (BYTE *) source;
 723         const BYTE *anchor = ip;
 724         const BYTE *const base = ip;
 725         const BYTE *const iend = ip + isize;
 726         const BYTE *const oend = (BYTE *) dest + osize;
 727         const BYTE *const mflimit = iend - MFLIMIT;
 728 #define matchlimit (iend - LASTLITERALS)
 729 
 730         BYTE *op = (BYTE *) dest;
 731 
 732         int len, length;
 733         const int skipStrength = SKIPSTRENGTH;
 734         U32 forwardH;
 735 
 736         /* Init */
 737         if (isize < MINLENGTH)
 738                 goto _last_literals;
 739 
 740         /* First Byte */
 741         ip++;
 742         forwardH = LZ4_HASH64K_VALUE(ip);
 743 
 744         /* Main Loop */
 745         for (;;) {
 746                 int findMatchAttempts = (1U << skipStrength) + 3;
 747                 const BYTE *forwardIp = ip;
 748                 const BYTE *ref;
 749                 BYTE *token;
 750 
 751                 /* Find a match */
 752                 do {
 753                         U32 h = forwardH;
 754                         int step = findMatchAttempts++ >> skipStrength;
 755                         ip = forwardIp;
 756                         forwardIp = ip + step;
 757 
 758                         if (forwardIp > mflimit) {
 759                                 goto _last_literals;
 760                         }
 761 
 762                         forwardH = LZ4_HASH64K_VALUE(forwardIp);
 763                         ref = base + HashTable[h];
 764                         HashTable[h] = ip - base;
 765 
 766                 } while (A32(ref) != A32(ip));
 767 
 768                 /* Catch up */
 769                 while ((ip > anchor) && (ref > (BYTE *) source) &&
 770                     (ip[-1] == ref[-1])) {
 771                         ip--;
 772                         ref--;
 773                 }
 774 
 775                 /* Encode Literal length */
 776                 length = ip - anchor;
 777                 token = op++;
 778 
 779                 /* Check output limit */
 780                 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 781                     (length >> 8) > oend)
 782                         return (0);
 783 
 784                 if (length >= (int)RUN_MASK) {
 785                         *token = (RUN_MASK << ML_BITS);
 786                         len = length - RUN_MASK;
 787                         for (; len > 254; len -= 255)
 788                                 *op++ = 255;
 789                         *op++ = (BYTE)len;
 790                 } else
 791                         *token = (length << ML_BITS);
 792 
 793                 /* Copy Literals */
 794                 LZ4_BLINDCOPY(anchor, op, length);
 795 
 796                 _next_match:
 797                 /* Encode Offset */
 798                 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 799 
 800                 /* Start Counting */
 801                 ip += MINMATCH;
 802                 ref += MINMATCH;        /* MinMatch verified */
 803                 anchor = ip;
 804                 while (ip < matchlimit - (STEPSIZE - 1)) {
 805                         UARCH diff = AARCH(ref) ^ AARCH(ip);
 806                         if (!diff) {
 807                                 ip += STEPSIZE;
 808                                 ref += STEPSIZE;
 809                                 continue;
 810                         }
 811                         ip += LZ4_NbCommonBytes(diff);
 812                         goto _endCount;
 813                 }
 814 #if LZ4_ARCH64
 815                 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 816                         ip += 4;
 817                         ref += 4;
 818                 }
 819 #endif
 820                 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 821                         ip += 2;
 822                         ref += 2;
 823                 }
 824                 if ((ip < matchlimit) && (*ref == *ip))
 825                         ip++;
 826                 _endCount:
 827 
 828                 /* Encode MatchLength */
 829                 len = (ip - anchor);
 830                 /* Check output limit */
 831                 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 832                         return (0);
 833                 if (len >= (int)ML_MASK) {
 834                         *token += ML_MASK;
 835                         len -= ML_MASK;
 836                         for (; len > 509; len -= 510) {
 837                                 *op++ = 255;
 838                                 *op++ = 255;
 839                         }
 840                         if (len > 254) {
 841                                 len -= 255;
 842                                 *op++ = 255;
 843                         }
 844                         *op++ = (BYTE)len;
 845                 } else
 846                         *token += len;
 847 
 848                 /* Test end of chunk */
 849                 if (ip > mflimit) {
 850                         anchor = ip;
 851                         break;
 852                 }
 853                 /* Fill table */
 854                 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
 855 
 856                 /* Test next position */
 857                 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
 858                 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
 859                 if (A32(ref) == A32(ip)) {
 860                         token = op++;
 861                         *token = 0;
 862                         goto _next_match;
 863                 }
 864                 /* Prepare next loop */
 865                 anchor = ip++;
 866                 forwardH = LZ4_HASH64K_VALUE(ip);
 867         }
 868 
 869         _last_literals:
 870         /* Encode Last Literals */
 871         {
 872                 int lastRun = iend - anchor;
 873                 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 874                     oend)
 875                         return (0);
 876                 if (lastRun >= (int)RUN_MASK) {
 877                         *op++ = (RUN_MASK << ML_BITS);
 878                         lastRun -= RUN_MASK;
 879                         for (; lastRun > 254; lastRun -= 255)
 880                                 *op++ = 255;
 881                         *op++ = (BYTE)lastRun;
 882                 } else
 883                         *op++ = (lastRun << ML_BITS);
 884                 (void) memcpy(op, anchor, iend - anchor);
 885                 op += iend - anchor;
 886         }
 887 
 888         /* End */
 889         return (int)(((char *)op) - dest);
 890 }
 891 
 892 static int
 893 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
 894 {
 895 #if HEAPMODE
 896         void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
 897         int result;
 898 
 899         /*
 900          * out of kernel memory, gently fall through - this will disable
 901          * compression in zio_compress_data
 902          */
 903         if (ctx == NULL)
 904                 return (0);
 905 
 906         if (isize < LZ4_64KLIMIT)
 907                 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
 908         else
 909                 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
 910 
 911         kmem_free(ctx, sizeof (struct refTables));
 912         return (result);
 913 #else
 914         if (isize < (int)LZ4_64KLIMIT)
 915                 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
 916         return (LZ4_compressCtx(NULL, source, dest, isize, osize));
 917 #endif
 918 }
 919 
 920 /* Decompression functions */
 921 
 922 /*
 923  * Note: The decoding functions real_LZ4_uncompress() and
 924  *      LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
 925  *      attack type. They will never write nor read outside of the provided
 926  *      output buffers. LZ4_uncompress_unknownOutputSize() also insures that
 927  *      it will never read outside of the input buffer. A corrupted input
 928  *      will produce an error result, a negative int, indicating the position
 929  *      of the error within input stream.
 930  */
 931 
 932 static int
 933 real_LZ4_uncompress(const char *source, char *dest, int osize)
 934 {
 935         /* Local Variables */
 936         const BYTE *restrict ip = (const BYTE *) source;
 937         const BYTE *ref;
 938 
 939         BYTE *op = (BYTE *) dest;
 940         BYTE *const oend = op + osize;
 941         BYTE *cpy;
 942 
 943         unsigned token;
 944 
 945         size_t length;
 946         size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
 947 #if LZ4_ARCH64
 948         size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
 949 #endif
 950 
 951         /* Main Loop */
 952         for (;;) {
 953                 /* get runlength */
 954                 token = *ip++;
 955                 if ((length = (token >> ML_BITS)) == RUN_MASK) {
 956                         size_t len;
 957                         for (; (len = *ip++) == 255; length += 255) {
 958                         }
 959                         length += len;
 960                 }
 961                 /* copy literals */
 962                 cpy = op + length;
 963                 if unlikely(cpy > oend - COPYLENGTH) {
 964                         if (cpy != oend)
 965                                 /* Error: we must necessarily stand at EOF */
 966                                 goto _output_error;
 967                         (void) memcpy(op, ip, length);
 968                         ip += length;
 969                         break;  /* EOF */
 970                         }
 971                 LZ4_WILDCOPY(ip, op, cpy);
 972                 ip -= (op - cpy);
 973                 op = cpy;
 974 
 975                 /* get offset */
 976                 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
 977                 ip += 2;
 978                 if unlikely(ref < (BYTE * const) dest)
 979                         /*
 980                          * Error: offset create reference outside destination
 981                          * buffer
 982                          */
 983                         goto _output_error;
 984 
 985                 /* get matchlength */
 986                 if ((length = (token & ML_MASK)) == ML_MASK) {
 987                         for (; *ip == 255; length += 255) {
 988                                 ip++;
 989                         }
 990                         length += *ip++;
 991                 }
 992                 /* copy repeated sequence */
 993                 if unlikely(op - ref < STEPSIZE) {
 994 #if LZ4_ARCH64
 995                         size_t dec64 = dec64table[op-ref];
 996 #else
 997                         const int dec64 = 0;
 998 #endif
 999                         op[0] = ref[0];
1000                         op[1] = ref[1];
1001                         op[2] = ref[2];
1002                         op[3] = ref[3];
1003                         op += 4;
1004                         ref += 4;
1005                         ref -= dec32table[op-ref];
1006                         A32(op) = A32(ref);
1007                         op += STEPSIZE - 4;
1008                         ref -= dec64;
1009                 } else {
1010                         LZ4_COPYSTEP(ref, op);
1011                 }
1012                 cpy = op + length - (STEPSIZE - 4);
1013                 if (cpy > oend - COPYLENGTH) {
1014                         if (cpy > oend)
1015                                 /*
1016                                  * Error: request to write beyond destination
1017                                  * buffer
1018                                  */
1019                                 goto _output_error;
1020                         LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1021                         while (op < cpy)
1022                                 *op++ = *ref++;
1023                         op = cpy;
1024                         if (op == oend)
1025                                 /*
1026                                  * Check EOF (should never happen, since last
1027                                  * 5 bytes are supposed to be literals)
1028                                  */
1029                                 goto _output_error;
1030                         continue;
1031                 }
1032                 LZ4_SECURECOPY(ref, op, cpy);
1033                 op = cpy;       /* correction */
1034         }
1035 
1036         /* end of decoding */
1037         return (int)(((char *)ip) - source);
1038 
1039         /* write overflow error detected */
1040         _output_error:
1041         return (int)(-(((char *)ip) - source));
1042 }
1043 
1044 static int
1045 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1046     int maxOutputSize)
1047 {
1048         /* Local Variables */
1049         const BYTE *restrict ip = (const BYTE *) source;
1050         const BYTE *const iend = ip + isize;
1051         const BYTE *ref;
1052 
1053         BYTE *op = (BYTE *) dest;
1054         BYTE *const oend = op + maxOutputSize;
1055         BYTE *cpy;
1056 
1057         size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1058 #if LZ4_ARCH64
1059         size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1060 #endif
1061 
1062         /* Main Loop */
1063         while (ip < iend) {
1064                 unsigned token;
1065                 size_t length;
1066 
1067                 /* get runlength */
1068                 token = *ip++;
1069                 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1070                         int s = 255;
1071                         while ((ip < iend) && (s == 255)) {
1072                                 s = *ip++;
1073                                 length += s;
1074                         }
1075                 }
1076                 /* copy literals */
1077                 cpy = op + length;
1078                 if ((cpy > oend - COPYLENGTH) ||
1079                     (ip + length > iend - COPYLENGTH)) {
1080                         if (cpy > oend)
1081                                 /* Error: writes beyond output buffer */
1082                                 goto _output_error;
1083                         if (ip + length != iend)
1084                                 /*
1085                                  * Error: LZ4 format requires to consume all
1086                                  * input at this stage
1087                                  */
1088                                 goto _output_error;
1089                         (void) memcpy(op, ip, length);
1090                         op += length;
1091                         /* Necessarily EOF, due to parsing restrictions */
1092                         break;
1093                 }
1094                 LZ4_WILDCOPY(ip, op, cpy);
1095                 ip -= (op - cpy);
1096                 op = cpy;
1097 
1098                 /* get offset */
1099                 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1100                 ip += 2;
1101                 if (ref < (BYTE * const) dest)
1102                         /*
1103                          * Error: offset creates reference outside of
1104                          * destination buffer
1105                          */
1106                         goto _output_error;
1107 
1108                 /* get matchlength */
1109                 if ((length = (token & ML_MASK)) == ML_MASK) {
1110                         while (ip < iend) {
1111                                 int s = *ip++;
1112                                 length += s;
1113                                 if (s == 255)
1114                                         continue;
1115                                 break;
1116                         }
1117                 }
1118                 /* copy repeated sequence */
1119                 if unlikely(op - ref < STEPSIZE) {
1120 #if LZ4_ARCH64
1121                         size_t dec64 = dec64table[op-ref];
1122 #else
1123                         const int dec64 = 0;
1124 #endif
1125                         op[0] = ref[0];
1126                         op[1] = ref[1];
1127                         op[2] = ref[2];
1128                         op[3] = ref[3];
1129                         op += 4;
1130                         ref += 4;
1131                         ref -= dec32table[op-ref];
1132                         A32(op) = A32(ref);
1133                         op += STEPSIZE - 4;
1134                         ref -= dec64;
1135                 } else {
1136                         LZ4_COPYSTEP(ref, op);
1137                 }
1138                 cpy = op + length - (STEPSIZE - 4);
1139                 if (cpy > oend - COPYLENGTH) {
1140                         if (cpy > oend)
1141                                 /*
1142                                  * Error: request to write outside of
1143                                  * destination buffer
1144                                  */
1145                                 goto _output_error;
1146                         LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1147                         while (op < cpy)
1148                                 *op++ = *ref++;
1149                         op = cpy;
1150                         if (op == oend)
1151                                 /*
1152                                  * Check EOF (should never happen, since
1153                                  * last 5 bytes are supposed to be literals)
1154                                  */
1155                                 goto _output_error;
1156                         continue;
1157                 }
1158                 LZ4_SECURECOPY(ref, op, cpy);
1159                 op = cpy;       /* correction */
1160         }
1161 
1162         /* end of decoding */
1163         return (int)(((char *)op) - dest);
1164 
1165         /* write overflow error detected */
1166         _output_error:
1167         return (int)(-(((char *)ip) - source));
1168 }