1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  14  */
  15 
  16 /*
  17  * Support functions for smb2_ioctl/fsctl codes:
  18  * FSCTL_SRV_OFFLOAD_READ
  19  * FSCTL_SRV_OFFLOAD_WRITE
  20  * (and related)
  21  */
  22 
  23 #include <smbsrv/smb2_kproto.h>
  24 #include <smbsrv/smb_fsops.h>
  25 #include <smb/winioctl.h>
  26 
  27 /*
  28  * Summary of how offload data transfer works:
  29  *
  30  * The client drives a server-side copy.  Outline:
  31  * 1: open src_file
  32  * 2: create dst_file and set its size
  33  * 3: while src_file not all copied {
  34  *        offload_read(src_file, &token);
  35  *        while token not all copied {
  36  *            offload_write(dst_file, token);
  37  *        }
  38  *    }
  39  *
  40  * Each "offload read" request returns a "token" representing some
  41  * portion of the source file.  The server decides what kind of
  42  * token to use, and how much of the source file it should cover.
  43  * The length represented may be less then the client requested.
  44  * No data are copied during offload_read (just meta-data).
  45  *
  46  * Each "offload write" request copies some portion of the data
  47  * represented by the "token" into the output file.  The amount
  48  * of data copied may be less than the client requested, and the
  49  * client keeps sending offload write requests until they have
  50  * copied all the data represented by the current token.
  51  */
  52 
  53 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
  54 #define OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND       1
  55 
  56 /*
  57  * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
  58  * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
  59  *
  60  * ...TOKEN_TYPE_ZERO_DATA:  A well-known Token that indicates ...
  61  * (offload write should just zero to the destination)
  62  * The payload (tok_other) is ignored with this type.
  63  */
  64 #define STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA    0xFFFF0001
  65 
  66 /* Our vendor-specific token type: struct tok_native1 */
  67 #define STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1      0x10001
  68 
  69 #define TOKEN_TOTAL_SIZE        512
  70 #define TOKEN_MAX_PAYLOAD       504     /* 512 - 8 */
  71 
  72 /* This mask is for sanity checking offsets etc. */
  73 #define OFFMASK         ((uint64_t)DEV_BSIZE-1)
  74 
  75 typedef struct smb_odx_token {
  76         uint32_t        tok_type;       /* big-endian on the wire */
  77         uint16_t        tok_reserved;   /* zero */
  78         uint16_t        tok_len;        /* big-endian on the wire */
  79         union {
  80                 uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
  81                 struct tok_native1 {
  82                         smb2fid_t       tn1_fid;
  83                         uint64_t        tn1_off;
  84                         uint64_t        tn1_eof;
  85                 } u_tok_native1;
  86         } tok_u;
  87 } smb_odx_token_t;
  88 
  89 typedef struct odx_write_args {
  90         uint32_t in_struct_size;
  91         uint32_t in_flags;
  92         uint64_t in_dstoff;
  93         uint64_t in_xlen;
  94         uint64_t in_xoff;
  95         uint32_t out_struct_size;
  96         uint32_t out_flags;
  97         uint64_t out_xlen;
  98 } odx_write_args_t;
  99 
 100 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
 101 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
 102 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
 103 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
 104 
 105 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
 106 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
 107     odx_write_args_t *, smb_odx_token_t *);
 108 
 109 
 110 /* We can disable this feature for testing etc. */
 111 int smb2_odx_enable = 1;
 112 
 113 /*
 114  * These two variables determine the intervals of offload_read and
 115  * offload_write calls (respectively) during an offload copy.
 116  *
 117  * For the offload read token we could offer a token representing
 118  * the whole file, but we'll have the client come back for a new
 119  * "token" after each 256M so we have a chance to look for "holes".
 120  * This lets us use the special "zero" token while we're in any
 121  * un-allocated parts of the file, so offload_write can use the
 122  * (more efficient) smb_fsop_freesp instead of copying.
 123  *
 124  * We limit the size of offload_write to 16M per request so we
 125  * don't end up taking so long with I/O that the client might
 126  * time out the request.  Keep: write_max <= read_max
 127  */
 128 uint32_t smb2_odx_read_max = (1<<28); /* 256M */
 129 uint32_t smb2_odx_write_max = (1<<24); /* 16M */
 130 
 131 /*
 132  * This buffer size determines the I/O size for the copy during
 133  * offoad write, where it will read/write using this buffer.
 134  * Note: We kmem_alloc this, so don't make it HUGE.  It only
 135  * needs to be large enough to allow the copy to proceed with
 136  * reasonable efficiency.  1M is currently the largest possible
 137  * block size with ZFS, so that's what we'll use here.
 138  */
 139 uint32_t smb2_odx_buf_size = (1<<20); /* 1M */
 140 
 141 
 142 /*
 143  * FSCTL_OFFLOAD_READ
 144  * [MS-FSCC] 2.3.77
 145  *
 146  * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
 147  *
 148  * The returned data is an (opaque to the client) 512-byte "token"
 149  * that represents the specified range (offset, length) of the
 150  * source file.  The "token" we return here comes back to us in an
 151  * FSCTL_OFFLOAD_READ.  We must stash whatever we'll need then in
 152  * the token we return here.
 153  *
 154  * We want server-side copy to be able to copy "holes" efficiently,
 155  * but would rather avoid the complexity of encoding a list of all
 156  * allocated ranges into our returned token, so this compromise:
 157  *
 158  * When the current range is entirely within a "hole", we'll return
 159  * the special "zeros" token, and the offload write using that token
 160  * will use the simple and very efficient smb_fsop_freesp.  In this
 161  * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
 162  *
 163  * When there's any data in the range to copy, we'll return our
 164  * "native" token, and the subsequent offload_write will walk the
 165  * allocated ranges copying and/or zeroing as needed.  In this
 166  * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
 167  *
 168  * One additional optimization allowed by the protocol is that when
 169  * we discover that there's no more data after the current range,
 170  * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
 171  * they can stop copying here if they like.
 172  */
 173 uint32_t
 174 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
 175 {
 176         smb_attr_t src_attr;
 177         smb_odx_token_t *tok = NULL;
 178         struct tok_native1 *tn1;
 179         smb_ofile_t *ofile = sr->fid_ofile;
 180         uint64_t src_size, src_rnd_size;
 181         off64_t data, hole;
 182         uint32_t in_struct_size;
 183         uint32_t in_flags;
 184         uint32_t in_ttl;
 185         uint64_t in_file_off;
 186         uint64_t in_copy_len;
 187         uint64_t out_xlen;
 188         uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
 189         uint32_t out_flags = 0;
 190         uint32_t status;
 191         uint32_t tok_type;
 192         int rc;
 193 
 194         if (smb2_odx_enable == 0)
 195                 return (NT_STATUS_NOT_SUPPORTED);
 196 
 197         /*
 198          * Make sure the (src) ofile granted access allows read.
 199          * [MS-FSA] didn't mention this, so it's not clear where
 200          * this should happen relative to other checks.  Usually
 201          * access checks happen early.
 202          */
 203         status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
 204         if (status != NT_STATUS_SUCCESS)
 205                 return (status);
 206 
 207         /*
 208          * Decode FSCTL_OFFLOAD_READ_INPUT struct,
 209          * and do in/out size checks.
 210          */
 211         rc = smb_mbc_decodef(
 212             fsctl->in_mbc, "lll4.qq",
 213             &in_struct_size,        /* l */
 214             &in_flags,              /* l */
 215             &in_ttl,                /* l */
 216             /* reserved         4. */
 217             &in_file_off,   /* q */
 218             &in_copy_len);  /* q */
 219         if (rc != 0)
 220                 return (NT_STATUS_BUFFER_TOO_SMALL);
 221         if (fsctl->MaxOutputResp < out_struct_size)
 222                 return (NT_STATUS_BUFFER_TOO_SMALL);
 223 
 224         /*
 225          * More arg checking per MS-FSA
 226          */
 227         if ((in_file_off & OFFMASK) != 0 ||
 228             (in_copy_len & OFFMASK) != 0)
 229                 return (NT_STATUS_INVALID_PARAMETER);
 230         if (in_struct_size != 32)
 231                 return (NT_STATUS_INVALID_PARAMETER);
 232         if (in_file_off > INT64_MAX ||
 233             (in_file_off + in_copy_len) < in_file_off)
 234                 return (NT_STATUS_INVALID_PARAMETER);
 235 
 236         /*
 237          * [MS-FSA] (summarizing)
 238          * If not data stream, or if sparse, encrypted, compressed...
 239          * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
 240          *
 241          * We'll ignore most of those except to require:
 242          * Plain file, not a stream.
 243          */
 244         if (!smb_node_is_file(ofile->f_node))
 245                 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
 246         if (SMB_IS_STREAM(ofile->f_node))
 247                 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
 248 
 249         /*
 250          * [MS-FSA] If Open.Stream.IsDeleted ...
 251          * We don't really have this.
 252          */
 253 
 254         /*
 255          * If CopyLength == 0, "return immediately success".
 256          */
 257         if (in_copy_len == 0) {
 258                 out_xlen = 0;
 259                 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
 260                 goto done;
 261         }
 262 
 263         /*
 264          * Check for lock conflicting with the read.
 265          */
 266         status = smb_lock_range_access(sr, ofile->f_node,
 267             in_file_off, in_copy_len, B_FALSE);
 268         if (status != 0)
 269                 return (status); /* == FILE_LOCK_CONFLICT */
 270 
 271         /*
 272          * Get the file size (rounded to a full block)
 273          * and check the requested offset.
 274          */
 275         bzero(&src_attr, sizeof (src_attr));
 276         src_attr.sa_mask = SMB_AT_SIZE;
 277         status = smb2_ofile_getattr(sr, ofile, &src_attr);
 278         if (status != NT_STATUS_SUCCESS)
 279                 return (status);
 280         src_size = src_attr.sa_vattr.va_size;
 281         if (in_file_off >= src_size)
 282                 return (NT_STATUS_END_OF_FILE);
 283 
 284         /*
 285          * Limit the transfer length based on (rounded) EOF.
 286          * Clients expect ranges of whole disk blocks.
 287          * If we get a read in this rounded-up range,
 288          * we'll supply zeros.
 289          */
 290         src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
 291         out_xlen = in_copy_len;
 292         if ((in_file_off + out_xlen) > src_rnd_size)
 293                 out_xlen = src_rnd_size - in_file_off;
 294 
 295         /*
 296          * Also, have the client come back for a new token after every
 297          * smb2_odx_read_max bytes, so we'll have opportunities to
 298          * recognize "holes" in the source file.
 299          */
 300         if (out_xlen > smb2_odx_read_max)
 301                 out_xlen = smb2_odx_read_max;
 302 
 303         /*
 304          * Ask the filesystem if there are any allocated regions in
 305          * the requested range, and return either the "zeros" token
 306          * or our "native" token as appropriate (details above).
 307          */
 308         data = in_file_off;
 309         tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
 310         rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
 311             &data, &hole);
 312         switch (rc) {
 313         case 0:
 314                 /* Found some data.  Is it beyond this range? */
 315                 if (data >= (in_file_off + out_xlen))
 316                         tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
 317                 break;
 318         case ENXIO:
 319                 /* No data here or following. */
 320                 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
 321                 out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
 322                 break;
 323         case ENOSYS:    /* FS does not support VOP_IOCTL... */
 324         case ENOTTY:    /* ... or _FIO_SEEK_DATA, _HOLE */
 325                 break;
 326         default:
 327                 cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
 328                 break;
 329         }
 330 
 331 done:
 332         /* Already checked MaxOutputResp */
 333         (void) smb_mbc_encodef(
 334             fsctl->out_mbc, "llq",
 335             out_struct_size,    /* l */
 336             out_flags,          /* l */
 337             out_xlen);          /* q */
 338 
 339         /*
 340          * Build the ODX token to return
 341          */
 342         tok = smb_srm_zalloc(sr, sizeof (*tok));
 343         tok->tok_type = tok_type;
 344         tok->tok_reserved = 0;
 345         if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
 346                 tok->tok_len = sizeof (*tn1);
 347                 tn1 = &tok->tok_u.u_tok_native1;
 348                 tn1->tn1_fid.persistent = ofile->f_persistid;
 349                 tn1->tn1_fid.temporal = ofile->f_fid;
 350                 tn1->tn1_off = in_file_off;
 351                 tn1->tn1_eof = src_size;
 352         }
 353 
 354         rc = smb_odx_put_token(fsctl->out_mbc, tok);
 355         if (rc != 0)
 356                 return (NT_STATUS_BUFFER_TOO_SMALL);
 357 
 358         return (NT_STATUS_SUCCESS);
 359 }
 360 
 361 /*
 362  * FSCTL_SRV_OFFLOAD_WRITE
 363  * [MS-FSCC] 2.3.80
 364  *
 365  * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
 366  *
 367  * Copies from a source file identified by a "token"
 368  * (previously returned by FSCTL_OFFLOAD_READ)
 369  * to the file on which the ioctl is issued.
 370  */
 371 uint32_t
 372 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
 373 {
 374         smb_attr_t dst_attr;
 375         odx_write_args_t args;
 376         smb_odx_token_t *tok = NULL;
 377         smb_ofile_t *ofile = sr->fid_ofile;
 378         uint64_t dst_size;
 379         uint32_t status = NT_STATUS_INVALID_PARAMETER;
 380         int rc;
 381 
 382         bzero(&args, sizeof (args));
 383         args.out_struct_size = 16;
 384 
 385         if (smb2_odx_enable == 0)
 386                 return (NT_STATUS_NOT_SUPPORTED);
 387 
 388         /*
 389          * Make sure the (dst) ofile granted_access allows write.
 390          * [MS-FSA] didn't mention this, so it's not clear where
 391          * this should happen relative to other checks.  Usually
 392          * access checks happen early.
 393          */
 394         status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
 395         if (status != NT_STATUS_SUCCESS)
 396                 return (status);
 397 
 398         /*
 399          * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
 400          * and do in/out size checks.
 401          */
 402         rc = smb_mbc_decodef(
 403             fsctl->in_mbc, "llqqq",
 404             &args.in_struct_size,   /* l */
 405             &args.in_flags,         /* l */
 406             &args.in_dstoff,                /* q */
 407             &args.in_xlen,          /* q */
 408             &args.in_xoff);         /* q */
 409         if (rc != 0)
 410                 return (NT_STATUS_BUFFER_TOO_SMALL);
 411         tok = smb_srm_zalloc(sr, sizeof (*tok));
 412         rc = smb_odx_get_token(fsctl->in_mbc, tok);
 413         if (rc != 0)
 414                 return (NT_STATUS_BUFFER_TOO_SMALL);
 415         if (fsctl->MaxOutputResp < args.out_struct_size)
 416                 return (NT_STATUS_BUFFER_TOO_SMALL);
 417 
 418         /*
 419          * More arg checking per MS-FSA
 420          */
 421         if ((args.in_dstoff & OFFMASK) != 0 ||
 422             (args.in_xoff & OFFMASK) != 0 ||
 423             (args.in_xlen & OFFMASK) != 0)
 424                 return (NT_STATUS_INVALID_PARAMETER);
 425         if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
 426                 return (NT_STATUS_INVALID_PARAMETER);
 427         if (args.in_dstoff > INT64_MAX ||
 428             (args.in_dstoff + args.in_xlen) < args.in_dstoff)
 429                 return (NT_STATUS_INVALID_PARAMETER);
 430 
 431         /*
 432          * If CopyLength == 0, "return immediately success".
 433          */
 434         if (args.in_xlen == 0) {
 435                 status = 0;
 436                 goto done;
 437         }
 438 
 439         /*
 440          * [MS-FSA] (summarizing)
 441          * If not data stream, or if sparse, encrypted, compressed...
 442          * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
 443          *
 444          * We'll ignore most of those except to require:
 445          * Plain file, not a stream.
 446          */
 447         if (!smb_node_is_file(ofile->f_node))
 448                 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
 449         if (SMB_IS_STREAM(ofile->f_node))
 450                 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
 451 
 452         /*
 453          * [MS-FSA] If Open.Stream.IsDeleted ...
 454          * We don't really have such a thing.
 455          * Also skip Volume.MaxFileSize check.
 456          */
 457 
 458         /*
 459          * Check for lock conflicting with the write.
 460          */
 461         status = smb_lock_range_access(sr, ofile->f_node,
 462             args.in_dstoff, args.in_xlen, B_TRUE);
 463         if (status != 0)
 464                 return (status); /* == FILE_LOCK_CONFLICT */
 465 
 466         /*
 467          * Need the file size
 468          */
 469         bzero(&dst_attr, sizeof (dst_attr));
 470         dst_attr.sa_mask = SMB_AT_SIZE;
 471         status = smb2_ofile_getattr(sr, ofile, &dst_attr);
 472         if (status != NT_STATUS_SUCCESS)
 473                 return (status);
 474         dst_size = dst_attr.sa_vattr.va_size;
 475 
 476         /*
 477          * Destination offset vs. EOF
 478          */
 479         if (args.in_dstoff >= dst_size)
 480                 return (NT_STATUS_END_OF_FILE);
 481 
 482         /*
 483          * Destination offset+len vs. EOF
 484          *
 485          * The spec. is silent about copying when the file length is
 486          * not block aligned, but clients appear to be OK with our
 487          * returning a non-aligned transfer length at EOF.
 488          * Trim xlen to the remaining (dst) file length.
 489          */
 490         if ((args.in_dstoff + args.in_xlen) > dst_size)
 491                 args.in_xlen = dst_size - args.in_dstoff;
 492 
 493         /*
 494          * Finally, run the I/O
 495          */
 496         switch (tok->tok_type) {
 497         case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
 498                 status = smb2_fsctl_odx_write_zeros(sr, &args);
 499                 break;
 500         case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
 501                 status = smb2_fsctl_odx_write_native1(sr, &args, tok);
 502                 break;
 503         default:
 504                 status = NT_STATUS_INVALID_TOKEN;
 505                 break;
 506         }
 507 
 508 done:
 509         /*
 510          * Checked MaxOutputResp above, so we can ignore errors
 511          * from mbc_encodef here.
 512          */
 513         if (status == NT_STATUS_SUCCESS) {
 514                 (void) smb_mbc_encodef(
 515                     fsctl->out_mbc, "llq",
 516                     args.out_struct_size,
 517                     args.out_flags,
 518                     args.out_xlen);
 519         }
 520 
 521         return (status);
 522 }
 523 
 524 /*
 525  * Handle FSCTL_OFFLOAD_WRITE with token type
 526  * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
 527  *
 528  * In this handler, the "token" represents a source of zeros.
 529  */
 530 static uint32_t
 531 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
 532 {
 533         smb_ofile_t *dst_ofile = sr->fid_ofile;
 534         uint64_t xlen = args->in_xlen;
 535         uint32_t status = 0;
 536         int rc;
 537 
 538         ASSERT(args->in_xlen > 0);
 539 
 540         /*
 541          * Limit the I/O size.  In here we're just doing freesp,
 542          * which is assumed to require only meta-data I/O, so
 543          * we'll allow up to smb2_odx_read_max (256M) per call.
 544          * This is essentially just a double-check of the range
 545          * we gave the client at the offload_read call, making
 546          * sure they can't use a zero token for longer ranges
 547          * than offload_read would allow.
 548          */
 549         if (xlen > smb2_odx_read_max)
 550                 xlen = smb2_odx_read_max;
 551 
 552         /*
 553          * Arrange for zeros to appear in the range:
 554          * in_dstoff, (in_dstoff + in_xlen)
 555          *
 556          * Just "free" the range and let it allocate as needed
 557          * when someone later writes in this range.
 558          */
 559         rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
 560             args->in_dstoff, xlen);
 561         if (rc != 0) {
 562                 status = smb_errno2status(rc);
 563                 if (status == NT_STATUS_INVALID_PARAMETER ||
 564                     status == NT_STATUS_NOT_SUPPORTED)
 565                         status = NT_STATUS_INVALID_DEVICE_REQUEST;
 566         } else {
 567                 args->out_xlen = xlen;
 568                 status = 0;
 569         }
 570 
 571         return (status);
 572 }
 573 
 574 /*
 575  * Handle FSCTL_OFFLOAD_WRITE with token type
 576  * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
 577  */
 578 static uint32_t
 579 smb2_fsctl_odx_write_native1(smb_request_t *sr,
 580     odx_write_args_t *args, smb_odx_token_t *tok)
 581 {
 582         struct tok_native1 *tn1;
 583         smb_ofile_t *dst_ofile = sr->fid_ofile;
 584         smb_ofile_t *src_ofile = NULL;
 585         void *buffer = NULL;
 586         size_t bufsize = smb2_odx_buf_size;
 587         uint64_t src_offset;
 588         uint32_t xlen;
 589         uint32_t status;
 590 
 591         /*
 592          * Lookup the source ofile using the resume key,
 593          * which smb2_fsctl_offload_read encoded as an
 594          * smb2fid_t.  Similar to smb2sr_lookup_fid(),
 595          * but different error code.
 596          */
 597         tn1 = &tok->tok_u.u_tok_native1;
 598         src_ofile = smb_ofile_lookup_by_fid(sr,
 599             (uint16_t)tn1->tn1_fid.temporal);
 600         if (src_ofile == NULL ||
 601             src_ofile->f_persistid != tn1->tn1_fid.persistent) {
 602                 status = NT_STATUS_INVALID_TOKEN;
 603                 goto out;
 604         }
 605 
 606         /*
 607          * Make sure src_ofile is open on a regular file, and
 608          * granted access includes READ_DATA
 609          */
 610         if (!smb_node_is_file(src_ofile->f_node)) {
 611                 status = NT_STATUS_ACCESS_DENIED;
 612                 goto out;
 613         }
 614         status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
 615         if (status != NT_STATUS_SUCCESS)
 616                 goto out;
 617 
 618         /*
 619          * Limit the I/O size.  In here we're actually copying,
 620          * so limit to smb2_odx_write_max (16M) per call.
 621          * Note that xlen is a 32-bit value here.
 622          */
 623         if (args->in_xlen > smb2_odx_write_max)
 624                 xlen = smb2_odx_write_max;
 625         else
 626                 xlen = (uint32_t)args->in_xlen;
 627 
 628         /*
 629          * Note: in_xoff is relative to the beginning of the "token"
 630          * (a range of the source file tn1_off, tn1_eof).  Make sure
 631          * in_xoff is within the range represented by this token.
 632          */
 633         src_offset = tn1->tn1_off + args->in_xoff;
 634         if (src_offset >= tn1->tn1_eof ||
 635             src_offset < tn1->tn1_off) {
 636                 status = NT_STATUS_INVALID_PARAMETER;
 637                 goto out;
 638         }
 639 
 640         /*
 641          * Get a buffer used for copying, always
 642          * smb2_odx_buf_size (1M)
 643          *
 644          * Rather than sleep for this relatively large allocation,
 645          * allow the allocation to fail and return an error.
 646          * The client should then fall back to normal copy.
 647          */
 648         buffer = kmem_alloc(bufsize, KM_NOSLEEP | KM_NORMALPRI);
 649         if (buffer == NULL) {
 650                 status = NT_STATUS_INSUFF_SERVER_RESOURCES;
 651                 goto out;
 652         }
 653 
 654         /*
 655          * Copy src to dst for xlen
 656          *
 657          * Note: caller needs out_xlen set to the amount moved.
 658          * Sparse copy leaves the "resid" in xlen.
 659          */
 660         args->out_xlen = xlen;
 661         status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
 662             src_offset, args->in_dstoff, &xlen, buffer, bufsize);
 663         args->out_xlen -= xlen;
 664 
 665         /*
 666          * If we did any I/O, ignore the error that stopped us.
 667          * We'll report this error during the next call.
 668          */
 669         if (args->out_xlen > 0)
 670                 status = 0;
 671 
 672 out:
 673         if (src_ofile != NULL)
 674                 smb_ofile_release(src_ofile);
 675 
 676         if (buffer != NULL)
 677                 kmem_free(buffer, bufsize);
 678 
 679         return (status);
 680 }
 681 
 682 /*
 683  * Get an smb_odx_token_t from the (input) mbuf chain.
 684  * Consumes exactly TOKEN_TOTAL_SIZE bytes.
 685  */
 686 static int
 687 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
 688 {
 689         mbuf_chain_t tok_mbc;
 690         int start_pos = mbc->chain_offset;
 691         int rc;
 692 
 693         if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
 694                 return (-1);
 695 
 696         /*
 697          * No big-endian support in smb_mbc_encodef, so swap
 698          * the big-endian fields: tok_type (32-bits),
 699          * (reserved is 16-bit zero, so no swap),
 700          * and tok_len (16-bits)
 701          */
 702         rc = smb_mbc_decodef(
 703             mbc, "l..w",
 704             &tok->tok_type,
 705             /* tok_reserved */
 706             &tok->tok_len);
 707         if (rc != 0)
 708                 return (rc);
 709         tok->tok_type = BSWAP_32(tok->tok_type);
 710         tok->tok_len = BSWAP_16(tok->tok_len);
 711 
 712         if (tok->tok_len > TOKEN_MAX_PAYLOAD)
 713                 return (-1);
 714         rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
 715             mbc->chain_offset, tok->tok_len);
 716         if (rc != 0)
 717                 return (rc);
 718 
 719         switch (tok->tok_type) {
 720         case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
 721                 /* no payload */
 722                 break;
 723         case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
 724                 rc = smb_odx_get_token_native1(&tok_mbc,
 725                     &tok->tok_u.u_tok_native1);
 726                 break;
 727         default:
 728                 /* caller will error out */
 729                 break;
 730         }
 731 
 732         if (rc == 0) {
 733                 /* Advance past what we shadowed. */
 734                 mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
 735         }
 736 
 737         return (rc);
 738 }
 739 
 740 static int
 741 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
 742 {
 743         int rc;
 744 
 745         rc = smb_mbc_decodef(
 746             mbc, "qqqq",
 747             &tn1->tn1_fid.persistent,
 748             &tn1->tn1_fid.temporal,
 749             &tn1->tn1_off,
 750             &tn1->tn1_eof);
 751 
 752         return (rc);
 753 }
 754 
 755 /*
 756  * Put an smb_odx_token_t into the (output) mbuf chain,
 757  * padded to TOKEN_TOTAL_SIZE bytes.
 758  */
 759 static int
 760 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
 761 {
 762         int rc, padlen;
 763         int start_pos = mbc->chain_offset;
 764         int end_pos = start_pos + TOKEN_TOTAL_SIZE;
 765 
 766         if (tok->tok_len > TOKEN_MAX_PAYLOAD)
 767                 return (-1);
 768 
 769         /*
 770          * No big-endian support in smb_mbc_encodef, so swap
 771          * the big-endian fields: tok_type (32-bits),
 772          * (reserved is 16-bit zero, so no swap),
 773          * and tok_len (16-bits)
 774          */
 775         rc = smb_mbc_encodef(
 776             mbc, "lww",
 777             BSWAP_32(tok->tok_type),
 778             0, /* tok_reserved */
 779             BSWAP_16(tok->tok_len));
 780         if (rc != 0)
 781                 return (rc);
 782 
 783         switch (tok->tok_type) {
 784         case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
 785                 /* no payload */
 786                 break;
 787         case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
 788                 rc = smb_odx_put_token_native1(mbc,
 789                     &tok->tok_u.u_tok_native1);
 790                 break;
 791         default:
 792                 ASSERT(0);
 793                 return (-1);
 794         }
 795 
 796         /* Pad out to TOKEN_TOTAL_SIZE bytes. */
 797         if (mbc->chain_offset < end_pos) {
 798                 padlen = end_pos - mbc->chain_offset;
 799                 (void) smb_mbc_encodef(mbc, "#.", padlen);
 800         }
 801         ASSERT(mbc->chain_offset == end_pos);
 802 
 803         return (rc);
 804 }
 805 
 806 static int
 807 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
 808 {
 809         int rc;
 810 
 811         rc = smb_mbc_encodef(
 812             mbc, "qqqq",
 813             tn1->tn1_fid.persistent,
 814             tn1->tn1_fid.temporal,
 815             tn1->tn1_off,
 816             tn1->tn1_eof);
 817 
 818         return (rc);
 819 }