1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  * Copyright 2015, Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 
  37 #include <sys/param.h>
  38 #include <sys/isa_defs.h>
  39 #include <sys/types.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/systm.h>
  42 #include <sys/errno.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/mode.h>
  48 #include <sys/proc.h>
  49 #include <sys/filio.h>
  50 #include <sys/share.h>
  51 #include <sys/debug.h>
  52 #include <sys/rctl.h>
  53 #include <sys/nbmlock.h>
  54 
  55 #include <sys/cmn_err.h>
  56 
  57 /* This is global so that it can be used by brand emulation. */
  58 int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  59 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  60 static void fd_too_big(proc_t *);
  61 
  62 /*
  63  * File control.
  64  */
  65 int
  66 fcntl(int fdes, int cmd, intptr_t arg)
  67 {
  68         int iarg;
  69         int error = 0;
  70         int retval;
  71         proc_t *p;
  72         file_t *fp;
  73         vnode_t *vp;
  74         u_offset_t offset;
  75         u_offset_t start;
  76         struct vattr vattr;
  77         int in_crit;
  78         int flag;
  79         struct flock sbf;
  80         struct flock64 bf;
  81         struct o_flock obf;
  82         struct flock64_32 bf64_32;
  83         struct fshare fsh;
  84         struct shrlock shr;
  85         struct shr_locowner shr_own;
  86         offset_t maxoffset;
  87         model_t datamodel;
  88         int fdres;
  89 
  90 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  91         ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  92         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  93 #endif
  94 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  95         ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  96         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  97 #endif
  98 
  99         /*
 100          * First, for speed, deal with the subset of cases
 101          * that do not require getf() / releasef().
 102          */
 103         switch (cmd) {
 104         case F_GETFD:
 105                 if ((error = f_getfd_error(fdes, &flag)) == 0)
 106                         retval = flag;
 107                 goto out;
 108 
 109         case F_SETFD:
 110                 error = f_setfd_error(fdes, (int)arg);
 111                 retval = 0;
 112                 goto out;
 113 
 114         case F_GETFL:
 115                 if ((error = f_getfl(fdes, &flag)) == 0) {
 116                         retval = (flag & (FMASK | FASYNC));
 117                         if ((flag & (FSEARCH | FEXEC)) == 0)
 118                                 retval += FOPEN;
 119                         else
 120                                 retval |= (flag & (FSEARCH | FEXEC));
 121                 }
 122                 goto out;
 123 
 124         case F_GETXFL:
 125                 if ((error = f_getfl(fdes, &flag)) == 0) {
 126                         retval = flag;
 127                         if ((flag & (FSEARCH | FEXEC)) == 0)
 128                                 retval += FOPEN;
 129                 }
 130                 goto out;
 131 
 132         case F_BADFD:
 133                 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 134                         retval = fdres;
 135                 goto out;
 136         }
 137 
 138         /*
 139          * Second, for speed, deal with the subset of cases that
 140          * require getf() / releasef() but do not require copyin.
 141          */
 142         if ((fp = getf(fdes)) == NULL) {
 143                 error = EBADF;
 144                 goto out;
 145         }
 146         iarg = (int)arg;
 147 
 148         switch (cmd) {
 149         case F_DUPFD:
 150         case F_DUPFD_CLOEXEC:
 151                 p = curproc;
 152                 if ((uint_t)iarg >= p->p_fno_ctl) {
 153                         if (iarg >= 0)
 154                                 fd_too_big(p);
 155                         error = EINVAL;
 156                         goto done;
 157                 }
 158                 /*
 159                  * We need to increment the f_count reference counter
 160                  * before allocating a new file descriptor.
 161                  * Doing it other way round opens a window for race condition
 162                  * with closeandsetf() on the target file descriptor which can
 163                  * close the file still referenced by the original
 164                  * file descriptor.
 165                  */
 166                 mutex_enter(&fp->f_tlock);
 167                 fp->f_count++;
 168                 mutex_exit(&fp->f_tlock);
 169                 if ((retval = ufalloc_file(iarg, fp)) == -1) {
 170                         /*
 171                          * New file descriptor can't be allocated.
 172                          * Revert the reference count.
 173                          */
 174                         mutex_enter(&fp->f_tlock);
 175                         fp->f_count--;
 176                         mutex_exit(&fp->f_tlock);
 177                         error = EMFILE;
 178                 } else {
 179                         if (cmd == F_DUPFD_CLOEXEC) {
 180                                 f_setfd(retval, FD_CLOEXEC);
 181                         }
 182                 }
 183                 goto done;
 184 
 185         case F_DUP2FD_CLOEXEC:
 186                 if (fdes == iarg) {
 187                         error = EINVAL;
 188                         goto done;
 189                 }
 190 
 191                 /*FALLTHROUGH*/
 192 
 193         case F_DUP2FD:
 194                 p = curproc;
 195                 if (fdes == iarg) {
 196                         retval = iarg;
 197                 } else if ((uint_t)iarg >= p->p_fno_ctl) {
 198                         if (iarg >= 0)
 199                                 fd_too_big(p);
 200                         error = EBADF;
 201                 } else {
 202                         /*
 203                          * We can't hold our getf(fdes) across the call to
 204                          * closeandsetf() because it creates a window for
 205                          * deadlock: if one thread is doing dup2(a, b) while
 206                          * another is doing dup2(b, a), each one will block
 207                          * waiting for the other to call releasef().  The
 208                          * solution is to increment the file reference count
 209                          * (which we have to do anyway), then releasef(fdes),
 210                          * then closeandsetf().  Incrementing f_count ensures
 211                          * that fp won't disappear after we call releasef().
 212                          * When closeandsetf() fails, we try avoid calling
 213                          * closef() because of all the side effects.
 214                          */
 215                         mutex_enter(&fp->f_tlock);
 216                         fp->f_count++;
 217                         mutex_exit(&fp->f_tlock);
 218                         releasef(fdes);
 219                         if ((error = closeandsetf(iarg, fp)) == 0) {
 220                                 if (cmd == F_DUP2FD_CLOEXEC) {
 221                                         f_setfd(iarg, FD_CLOEXEC);
 222                                 }
 223                                 retval = iarg;
 224                         } else {
 225                                 mutex_enter(&fp->f_tlock);
 226                                 if (fp->f_count > 1) {
 227                                         fp->f_count--;
 228                                         mutex_exit(&fp->f_tlock);
 229                                 } else {
 230                                         mutex_exit(&fp->f_tlock);
 231                                         (void) closef(fp);
 232                                 }
 233                         }
 234                         goto out;
 235                 }
 236                 goto done;
 237 
 238         case F_SETFL:
 239                 vp = fp->f_vnode;
 240                 flag = fp->f_flag;
 241                 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 242                         iarg &= ~FNDELAY;
 243                 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 244                     0) {
 245                         iarg &= FMASK;
 246                         mutex_enter(&fp->f_tlock);
 247                         fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 248                         fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 249                         mutex_exit(&fp->f_tlock);
 250                 }
 251                 retval = 0;
 252                 goto done;
 253         }
 254 
 255         /*
 256          * Finally, deal with the expensive cases.
 257          */
 258         retval = 0;
 259         in_crit = 0;
 260         maxoffset = MAXOFF_T;
 261         datamodel = DATAMODEL_NATIVE;
 262 #if defined(_SYSCALL32_IMPL)
 263         if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 264                 maxoffset = MAXOFF32_T;
 265 #endif
 266 
 267         vp = fp->f_vnode;
 268         flag = fp->f_flag;
 269         offset = fp->f_offset;
 270 
 271         switch (cmd) {
 272         /*
 273          * The file system and vnode layers understand and implement
 274          * locking with flock64 structures. So here once we pass through
 275          * the test for compatibility as defined by LFS API, (for F_SETLK,
 276          * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 277          * F_FREESP) we transform the flock structure to a flock64 structure
 278          * and send it to the lower layers. Similarly in case of GETLK and
 279          * OFD_GETLK the returned flock64 structure is transformed to a flock
 280          * structure if everything fits in nicely, otherwise we return
 281          * EOVERFLOW.
 282          */
 283 
 284         case F_GETLK:
 285         case F_O_GETLK:
 286         case F_SETLK:
 287         case F_SETLKW:
 288         case F_SETLK_NBMAND:
 289         case F_OFD_GETLK:
 290         case F_OFD_SETLK:
 291         case F_OFD_SETLKW:
 292         case F_FLOCK:
 293         case F_FLOCKW:
 294 
 295                 /*
 296                  * Copy in input fields only.
 297                  */
 298 
 299                 if (cmd == F_O_GETLK) {
 300                         if (datamodel != DATAMODEL_ILP32) {
 301                                 error = EINVAL;
 302                                 break;
 303                         }
 304 
 305                         if (copyin((void *)arg, &obf, sizeof (obf))) {
 306                                 error = EFAULT;
 307                                 break;
 308                         }
 309                         bf.l_type = obf.l_type;
 310                         bf.l_whence = obf.l_whence;
 311                         bf.l_start = (off64_t)obf.l_start;
 312                         bf.l_len = (off64_t)obf.l_len;
 313                         bf.l_sysid = (int)obf.l_sysid;
 314                         bf.l_pid = obf.l_pid;
 315                 } else if (datamodel == DATAMODEL_NATIVE) {
 316                         if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 317                                 error = EFAULT;
 318                                 break;
 319                         }
 320                         /*
 321                          * XXX  In an LP64 kernel with an LP64 application
 322                          *      there's no need to do a structure copy here
 323                          *      struct flock == struct flock64. However,
 324                          *      we did it this way to avoid more conditional
 325                          *      compilation.
 326                          */
 327                         bf.l_type = sbf.l_type;
 328                         bf.l_whence = sbf.l_whence;
 329                         bf.l_start = (off64_t)sbf.l_start;
 330                         bf.l_len = (off64_t)sbf.l_len;
 331                         bf.l_sysid = sbf.l_sysid;
 332                         bf.l_pid = sbf.l_pid;
 333                 }
 334 #if defined(_SYSCALL32_IMPL)
 335                 else {
 336                         struct flock32 sbf32;
 337                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 338                                 error = EFAULT;
 339                                 break;
 340                         }
 341                         bf.l_type = sbf32.l_type;
 342                         bf.l_whence = sbf32.l_whence;
 343                         bf.l_start = (off64_t)sbf32.l_start;
 344                         bf.l_len = (off64_t)sbf32.l_len;
 345                         bf.l_sysid = sbf32.l_sysid;
 346                         bf.l_pid = sbf32.l_pid;
 347                 }
 348 #endif /* _SYSCALL32_IMPL */
 349 
 350                 /*
 351                  * 64-bit support: check for overflow for 32-bit lock ops
 352                  */
 353                 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 354                         break;
 355 
 356                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 357                         /* FLOCK* locking is always over the entire file. */
 358                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 359                             bf.l_len != 0) {
 360                                 error = EINVAL;
 361                                 break;
 362                         }
 363                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 364                                 error = EINVAL;
 365                                 break;
 366                         }
 367                 }
 368 
 369                 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
 370                     cmd == F_OFD_SETLKW) {
 371                         /*
 372                          * TBD OFD-style locking is currently limited to
 373                          * covering the entire file.
 374                          */
 375                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 376                             bf.l_len != 0) {
 377                                 error = EINVAL;
 378                                 break;
 379                         }
 380                 }
 381 
 382                 /*
 383                  * Not all of the filesystems understand F_O_GETLK, and
 384                  * there's no need for them to know.  Map it to F_GETLK.
 385                  *
 386                  * The *_frlock functions in the various file systems basically
 387                  * do some validation and then funnel everything through the
 388                  * fs_frlock function. For OFD-style locks fs_frlock will do
 389                  * nothing so that once control returns here we can call the
 390                  * ofdlock function with the correct fp. For OFD-style locks
 391                  * the unsupported remote file systems, such as NFS, detect and
 392                  * reject the OFD-style cmd argument.
 393                  */
 394                 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 395                     &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 396                         break;
 397 
 398                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 399                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 400                         /*
 401                          * This is an OFD-style lock so we need to handle it
 402                          * here. Because OFD-style locks are associated with
 403                          * the file_t we didn't have enough info down the
 404                          * VOP_FRLOCK path immediately above.
 405                          */
 406                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 407                                 break;
 408                 }
 409 
 410                 /*
 411                  * If command is GETLK and no lock is found, only
 412                  * the type field is changed.
 413                  */
 414                 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 415                     cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 416                         /* l_type always first entry, always a short */
 417                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 418                             sizeof (bf.l_type)))
 419                                 error = EFAULT;
 420                         break;
 421                 }
 422 
 423                 if (cmd == F_O_GETLK) {
 424                         /*
 425                          * Return an SVR3 flock structure to the user.
 426                          */
 427                         obf.l_type = (int16_t)bf.l_type;
 428                         obf.l_whence = (int16_t)bf.l_whence;
 429                         obf.l_start = (int32_t)bf.l_start;
 430                         obf.l_len = (int32_t)bf.l_len;
 431                         if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 432                                 /*
 433                                  * One or both values for the above fields
 434                                  * is too large to store in an SVR3 flock
 435                                  * structure.
 436                                  */
 437                                 error = EOVERFLOW;
 438                                 break;
 439                         }
 440                         obf.l_sysid = (int16_t)bf.l_sysid;
 441                         obf.l_pid = (int16_t)bf.l_pid;
 442                         if (copyout(&obf, (void *)arg, sizeof (obf)))
 443                                 error = EFAULT;
 444                 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 445                         /*
 446                          * Copy out SVR4 flock.
 447                          */
 448                         int i;
 449 
 450                         if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 451                                 error = EOVERFLOW;
 452                                 break;
 453                         }
 454 
 455                         if (datamodel == DATAMODEL_NATIVE) {
 456                                 for (i = 0; i < 4; i++)
 457                                         sbf.l_pad[i] = 0;
 458                                 /*
 459                                  * XXX  In an LP64 kernel with an LP64
 460                                  *      application there's no need to do a
 461                                  *      structure copy here as currently
 462                                  *      struct flock == struct flock64.
 463                                  *      We did it this way to avoid more
 464                                  *      conditional compilation.
 465                                  */
 466                                 sbf.l_type = bf.l_type;
 467                                 sbf.l_whence = bf.l_whence;
 468                                 sbf.l_start = (off_t)bf.l_start;
 469                                 sbf.l_len = (off_t)bf.l_len;
 470                                 sbf.l_sysid = bf.l_sysid;
 471                                 sbf.l_pid = bf.l_pid;
 472                                 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 473                                         error = EFAULT;
 474                         }
 475 #if defined(_SYSCALL32_IMPL)
 476                         else {
 477                                 struct flock32 sbf32;
 478                                 if (bf.l_start > MAXOFF32_T ||
 479                                     bf.l_len > MAXOFF32_T) {
 480                                         error = EOVERFLOW;
 481                                         break;
 482                                 }
 483                                 for (i = 0; i < 4; i++)
 484                                         sbf32.l_pad[i] = 0;
 485                                 sbf32.l_type = (int16_t)bf.l_type;
 486                                 sbf32.l_whence = (int16_t)bf.l_whence;
 487                                 sbf32.l_start = (off32_t)bf.l_start;
 488                                 sbf32.l_len = (off32_t)bf.l_len;
 489                                 sbf32.l_sysid = (int32_t)bf.l_sysid;
 490                                 sbf32.l_pid = (pid32_t)bf.l_pid;
 491                                 if (copyout(&sbf32,
 492                                     (void *)arg, sizeof (sbf32)))
 493                                         error = EFAULT;
 494                         }
 495 #endif
 496                 }
 497                 break;
 498 
 499         case F_CHKFL:
 500                 /*
 501                  * This is for internal use only, to allow the vnode layer
 502                  * to validate a flags setting before applying it.  User
 503                  * programs can't issue it.
 504                  */
 505                 error = EINVAL;
 506                 break;
 507 
 508         case F_ALLOCSP:
 509         case F_FREESP:
 510         case F_ALLOCSP64:
 511         case F_FREESP64:
 512                 /*
 513                  * Test for not-a-regular-file (and returning EINVAL)
 514                  * before testing for open-for-writing (and returning EBADF).
 515                  * This is relied upon by posix_fallocate() in libc.
 516                  */
 517                 if (vp->v_type != VREG) {
 518                         error = EINVAL;
 519                         break;
 520                 }
 521 
 522                 if ((flag & FWRITE) == 0) {
 523                         error = EBADF;
 524                         break;
 525                 }
 526 
 527                 if (datamodel != DATAMODEL_ILP32 &&
 528                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 529                         error = EINVAL;
 530                         break;
 531                 }
 532 
 533 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 534                 if (datamodel == DATAMODEL_ILP32 &&
 535                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 536                         struct flock32 sbf32;
 537                         /*
 538                          * For compatibility we overlay an SVR3 flock on an SVR4
 539                          * flock.  This works because the input field offsets
 540                          * in "struct flock" were preserved.
 541                          */
 542                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 543                                 error = EFAULT;
 544                                 break;
 545                         } else {
 546                                 bf.l_type = sbf32.l_type;
 547                                 bf.l_whence = sbf32.l_whence;
 548                                 bf.l_start = (off64_t)sbf32.l_start;
 549                                 bf.l_len = (off64_t)sbf32.l_len;
 550                                 bf.l_sysid = sbf32.l_sysid;
 551                                 bf.l_pid = sbf32.l_pid;
 552                         }
 553                 }
 554 #endif /* _ILP32 || _SYSCALL32_IMPL */
 555 
 556 #if defined(_LP64)
 557                 if (datamodel == DATAMODEL_LP64 &&
 558                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 559                         if (copyin((void *)arg, &bf, sizeof (bf))) {
 560                                 error = EFAULT;
 561                                 break;
 562                         }
 563                 }
 564 #endif /* defined(_LP64) */
 565 
 566 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 567                 if (datamodel == DATAMODEL_ILP32 &&
 568                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 569                         if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 570                                 error = EFAULT;
 571                                 break;
 572                         } else {
 573                                 /*
 574                                  * Note that the size of flock64 is different in
 575                                  * the ILP32 and LP64 models, due to the l_pad
 576                                  * field. We do not want to assume that the
 577                                  * flock64 structure is laid out the same in
 578                                  * ILP32 and LP64 environments, so we will
 579                                  * copy in the ILP32 version of flock64
 580                                  * explicitly and copy it to the native
 581                                  * flock64 structure.
 582                                  */
 583                                 bf.l_type = (short)bf64_32.l_type;
 584                                 bf.l_whence = (short)bf64_32.l_whence;
 585                                 bf.l_start = bf64_32.l_start;
 586                                 bf.l_len = bf64_32.l_len;
 587                                 bf.l_sysid = (int)bf64_32.l_sysid;
 588                                 bf.l_pid = (pid_t)bf64_32.l_pid;
 589                         }
 590                 }
 591 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 592 
 593                 if (cmd == F_ALLOCSP || cmd == F_FREESP)
 594                         error = flock_check(vp, &bf, offset, maxoffset);
 595                 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 596                         error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 597                 if (error)
 598                         break;
 599 
 600                 if (vp->v_type == VREG && bf.l_len == 0 &&
 601                     bf.l_start > OFFSET_MAX(fp)) {
 602                         error = EFBIG;
 603                         break;
 604                 }
 605 
 606                 /*
 607                  * Make sure that there are no conflicting non-blocking
 608                  * mandatory locks in the region being manipulated. If
 609                  * there are such locks then return EACCES.
 610                  */
 611                 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 612                         break;
 613 
 614                 if (nbl_need_check(vp)) {
 615                         u_offset_t      begin;
 616                         ssize_t         length;
 617 
 618                         nbl_start_crit(vp, RW_READER);
 619                         in_crit = 1;
 620                         vattr.va_mask = AT_SIZE;
 621                         if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 622                             != 0)
 623                                 break;
 624                         begin = start > vattr.va_size ? vattr.va_size : start;
 625                         length = vattr.va_size > start ? vattr.va_size - start :
 626                             start - vattr.va_size;
 627                         if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 628                             NULL)) {
 629                                 error = EACCES;
 630                                 break;
 631                         }
 632                 }
 633 
 634                 if (cmd == F_ALLOCSP64)
 635                         cmd = F_ALLOCSP;
 636                 else if (cmd == F_FREESP64)
 637                         cmd = F_FREESP;
 638 
 639                 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 640 
 641                 break;
 642 
 643 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 644         case F_GETLK64:
 645         case F_SETLK64:
 646         case F_SETLKW64:
 647         case F_SETLK64_NBMAND:
 648         case F_OFD_GETLK64:
 649         case F_OFD_SETLK64:
 650         case F_OFD_SETLKW64:
 651         case F_FLOCK64:
 652         case F_FLOCKW64:
 653                 /*
 654                  * Large Files: Here we set cmd as *LK and send it to
 655                  * lower layers. *LK64 is only for the user land.
 656                  * Most of the comments described above for F_SETLK
 657                  * applies here too.
 658                  * Large File support is only needed for ILP32 apps!
 659                  */
 660                 if (datamodel != DATAMODEL_ILP32) {
 661                         error = EINVAL;
 662                         break;
 663                 }
 664 
 665                 if (cmd == F_GETLK64)
 666                         cmd = F_GETLK;
 667                 else if (cmd == F_SETLK64)
 668                         cmd = F_SETLK;
 669                 else if (cmd == F_SETLKW64)
 670                         cmd = F_SETLKW;
 671                 else if (cmd == F_SETLK64_NBMAND)
 672                         cmd = F_SETLK_NBMAND;
 673                 else if (cmd == F_OFD_GETLK64)
 674                         cmd = F_OFD_GETLK;
 675                 else if (cmd == F_OFD_SETLK64)
 676                         cmd = F_OFD_SETLK;
 677                 else if (cmd == F_OFD_SETLKW64)
 678                         cmd = F_OFD_SETLKW;
 679                 else if (cmd == F_FLOCK64)
 680                         cmd = F_FLOCK;
 681                 else if (cmd == F_FLOCKW64)
 682                         cmd = F_FLOCKW;
 683 
 684                 /*
 685                  * Note that the size of flock64 is different in the ILP32
 686                  * and LP64 models, due to the sucking l_pad field.
 687                  * We do not want to assume that the flock64 structure is
 688                  * laid out in the same in ILP32 and LP64 environments, so
 689                  * we will copy in the ILP32 version of flock64 explicitly
 690                  * and copy it to the native flock64 structure.
 691                  */
 692 
 693                 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 694                         error = EFAULT;
 695                         break;
 696                 }
 697 
 698                 bf.l_type = (short)bf64_32.l_type;
 699                 bf.l_whence = (short)bf64_32.l_whence;
 700                 bf.l_start = bf64_32.l_start;
 701                 bf.l_len = bf64_32.l_len;
 702                 bf.l_sysid = (int)bf64_32.l_sysid;
 703                 bf.l_pid = (pid_t)bf64_32.l_pid;
 704 
 705                 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 706                         break;
 707 
 708                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 709                         /* FLOCK* locking is always over the entire file. */
 710                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 711                             bf.l_len != 0) {
 712                                 error = EINVAL;
 713                                 break;
 714                         }
 715                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 716                                 error = EINVAL;
 717                                 break;
 718                         }
 719                 }
 720 
 721                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 722                         /*
 723                          * TBD OFD-style locking is currently limited to
 724                          * covering the entire file.
 725                          */
 726                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 727                             bf.l_len != 0) {
 728                                 error = EINVAL;
 729                                 break;
 730                         }
 731                 }
 732 
 733                 /*
 734                  * The *_frlock functions in the various file systems basically
 735                  * do some validation and then funnel everything through the
 736                  * fs_frlock function. For OFD-style locks fs_frlock will do
 737                  * nothing so that once control returns here we can call the
 738                  * ofdlock function with the correct fp. For OFD-style locks
 739                  * the unsupported remote file systems, such as NFS, detect and
 740                  * reject the OFD-style cmd argument.
 741                  */
 742                 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 743                     NULL, fp->f_cred, NULL)) != 0)
 744                         break;
 745 
 746                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 747                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 748                         /*
 749                          * This is an OFD-style lock so we need to handle it
 750                          * here. Because OFD-style locks are associated with
 751                          * the file_t we didn't have enough info down the
 752                          * VOP_FRLOCK path immediately above.
 753                          */
 754                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 755                                 break;
 756                 }
 757 
 758                 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 759                     bf.l_type == F_UNLCK) {
 760                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 761                             sizeof (bf.l_type)))
 762                                 error = EFAULT;
 763                         break;
 764                 }
 765 
 766                 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 767                         int i;
 768 
 769                         /*
 770                          * We do not want to assume that the flock64 structure
 771                          * is laid out in the same in ILP32 and LP64
 772                          * environments, so we will copy out the ILP32 version
 773                          * of flock64 explicitly after copying the native
 774                          * flock64 structure to it.
 775                          */
 776                         for (i = 0; i < 4; i++)
 777                                 bf64_32.l_pad[i] = 0;
 778                         bf64_32.l_type = (int16_t)bf.l_type;
 779                         bf64_32.l_whence = (int16_t)bf.l_whence;
 780                         bf64_32.l_start = bf.l_start;
 781                         bf64_32.l_len = bf.l_len;
 782                         bf64_32.l_sysid = (int32_t)bf.l_sysid;
 783                         bf64_32.l_pid = (pid32_t)bf.l_pid;
 784                         if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 785                                 error = EFAULT;
 786                 }
 787                 break;
 788 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 789 
 790         case F_SHARE:
 791         case F_SHARE_NBMAND:
 792         case F_UNSHARE:
 793 
 794                 /*
 795                  * Copy in input fields only.
 796                  */
 797                 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 798                         error = EFAULT;
 799                         break;
 800                 }
 801 
 802                 /*
 803                  * Local share reservations always have this simple form
 804                  */
 805                 shr.s_access = fsh.f_access;
 806                 shr.s_deny = fsh.f_deny;
 807                 shr.s_sysid = 0;
 808                 shr.s_pid = ttoproc(curthread)->p_pid;
 809                 shr_own.sl_pid = shr.s_pid;
 810                 shr_own.sl_id = fsh.f_id;
 811                 shr.s_own_len = sizeof (shr_own);
 812                 shr.s_owner = (caddr_t)&shr_own;
 813                 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 814                 break;
 815 
 816         default:
 817                 error = EINVAL;
 818                 break;
 819         }
 820 
 821         if (in_crit)
 822                 nbl_end_crit(vp);
 823 
 824 done:
 825         releasef(fdes);
 826 out:
 827         if (error)
 828                 return (set_errno(error));
 829         return (retval);
 830 }
 831 
 832 int
 833 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 834 {
 835         struct vattr    vattr;
 836         int     error;
 837         u_offset_t start, end;
 838 
 839         /*
 840          * Determine the starting point of the request
 841          */
 842         switch (flp->l_whence) {
 843         case 0:         /* SEEK_SET */
 844                 start = (u_offset_t)flp->l_start;
 845                 if (start > max)
 846                         return (EINVAL);
 847                 break;
 848         case 1:         /* SEEK_CUR */
 849                 if (flp->l_start > (max - offset))
 850                         return (EOVERFLOW);
 851                 start = (u_offset_t)(flp->l_start + offset);
 852                 if (start > max)
 853                         return (EINVAL);
 854                 break;
 855         case 2:         /* SEEK_END */
 856                 vattr.va_mask = AT_SIZE;
 857                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 858                         return (error);
 859                 if (flp->l_start > (max - (offset_t)vattr.va_size))
 860                         return (EOVERFLOW);
 861                 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 862                 if (start > max)
 863                         return (EINVAL);
 864                 break;
 865         default:
 866                 return (EINVAL);
 867         }
 868 
 869         /*
 870          * Determine the range covered by the request.
 871          */
 872         if (flp->l_len == 0)
 873                 end = MAXEND;
 874         else if ((offset_t)flp->l_len > 0) {
 875                 if (flp->l_len > (max - start + 1))
 876                         return (EOVERFLOW);
 877                 end = (u_offset_t)(start + (flp->l_len - 1));
 878                 ASSERT(end <= max);
 879         } else {
 880                 /*
 881                  * Negative length; why do we even allow this ?
 882                  * Because this allows easy specification of
 883                  * the last n bytes of the file.
 884                  */
 885                 end = start;
 886                 start += (u_offset_t)flp->l_len;
 887                 (start)++;
 888                 if (start > max)
 889                         return (EINVAL);
 890                 ASSERT(end <= max);
 891         }
 892         ASSERT(start <= max);
 893         if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 894             end == (offset_t)max) {
 895                 flp->l_len = 0;
 896         }
 897         if (start  > end)
 898                 return (EINVAL);
 899         return (0);
 900 }
 901 
 902 static int
 903 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 904 {
 905         struct vattr    vattr;
 906         int     error;
 907 
 908         /*
 909          * Determine the starting point of the request. Assume that it is
 910          * a valid starting point.
 911          */
 912         switch (flp->l_whence) {
 913         case 0:         /* SEEK_SET */
 914                 *start = (u_offset_t)flp->l_start;
 915                 break;
 916         case 1:         /* SEEK_CUR */
 917                 *start = (u_offset_t)(flp->l_start + offset);
 918                 break;
 919         case 2:         /* SEEK_END */
 920                 vattr.va_mask = AT_SIZE;
 921                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 922                         return (error);
 923                 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 924                 break;
 925         default:
 926                 return (EINVAL);
 927         }
 928 
 929         return (0);
 930 }
 931 
 932 /*
 933  * Take rctl action when the requested file descriptor is too big.
 934  */
 935 static void
 936 fd_too_big(proc_t *p)
 937 {
 938         mutex_enter(&p->p_lock);
 939         (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 940             p->p_rctls, p, RCA_SAFE);
 941         mutex_exit(&p->p_lock);
 942 }