io-lx-public Wdiff usr/src/uts/common/syscall/fcntl.c

Print this page

OS-5591 Double flock(3C) causes undue block
OS-5585 fcntl(F_OFD_GETLK) should return EINVAL on bad parameters
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
OS-4098 move open, close and fcntl into the kernel

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/syscall/fcntl.c
          +++ new/usr/src/uts/common/syscall/fcntl.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25   25   * Copyright 2015, Joyent, Inc.
  26   26   */
  27   27  
  28   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29   29  /*        All Rights Reserved   */
  30   30  
  31   31  /*
  32   32   * Portions of this source code were derived from Berkeley 4.3 BSD
  33   33   * under license from the Regents of the University of California.
  34   34   */
  35   35  
  36   36  
  37   37  #include <sys/param.h>
  38   38  #include <sys/isa_defs.h>
  39   39  #include <sys/types.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/systm.h>
  42   42  #include <sys/errno.h>
  43   43  #include <sys/fcntl.h>
  44   44  #include <sys/flock.h>
  45   45  #include <sys/vnode.h>
  46   46  #include <sys/file.h>

↓ open down ↓

46 lines elided

↑ open up ↑

  47   47  #include <sys/mode.h>
  48   48  #include <sys/proc.h>
  49   49  #include <sys/filio.h>
  50   50  #include <sys/share.h>
  51   51  #include <sys/debug.h>
  52   52  #include <sys/rctl.h>
  53   53  #include <sys/nbmlock.h>
  54   54  
  55   55  #include <sys/cmn_err.h>
  56   56  
  57      -static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
       57 +/* This is global so that it can be used by brand emulation. */
       58 +int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  58   59  static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  59   60  static void fd_too_big(proc_t *);
  60   61  
  61   62  /*
  62   63   * File control.
  63   64   */
  64   65  int
  65   66  fcntl(int fdes, int cmd, intptr_t arg)
  66   67  {
  67   68          int iarg;

  68   69          int error = 0;
  69   70          int retval;
  70   71          proc_t *p;
  71   72          file_t *fp;
  72   73          vnode_t *vp;
  73   74          u_offset_t offset;
  74   75          u_offset_t start;
  75   76          struct vattr vattr;
  76   77          int in_crit;
  77   78          int flag;
  78   79          struct flock sbf;
  79   80          struct flock64 bf;
  80   81          struct o_flock obf;
  81   82          struct flock64_32 bf64_32;
  82   83          struct fshare fsh;
  83   84          struct shrlock shr;
  84   85          struct shr_locowner shr_own;
  85   86          offset_t maxoffset;
  86   87          model_t datamodel;
  87   88          int fdres;
  88   89  
  89   90  #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  90   91          ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  91   92          ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  92   93  #endif
  93   94  #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  94   95          ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  95   96          ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  96   97  #endif
  97   98  
  98   99          /*
  99  100           * First, for speed, deal with the subset of cases
 100  101           * that do not require getf() / releasef().
 101  102           */
 102  103          switch (cmd) {
 103  104          case F_GETFD:
 104  105                  if ((error = f_getfd_error(fdes, &flag)) == 0)
 105  106                          retval = flag;
 106  107                  goto out;
 107  108  
 108  109          case F_SETFD:
 109  110                  error = f_setfd_error(fdes, (int)arg);
 110  111                  retval = 0;
 111  112                  goto out;
 112  113  
 113  114          case F_GETFL:
 114  115                  if ((error = f_getfl(fdes, &flag)) == 0) {
 115  116                          retval = (flag & (FMASK | FASYNC));
 116  117                          if ((flag & (FSEARCH | FEXEC)) == 0)
 117  118                                  retval += FOPEN;
 118  119                          else
 119  120                                  retval |= (flag & (FSEARCH | FEXEC));
 120  121                  }
 121  122                  goto out;
 122  123  
 123  124          case F_GETXFL:
 124  125                  if ((error = f_getfl(fdes, &flag)) == 0) {
 125  126                          retval = flag;
 126  127                          if ((flag & (FSEARCH | FEXEC)) == 0)
 127  128                                  retval += FOPEN;
 128  129                  }
 129  130                  goto out;
 130  131  
 131  132          case F_BADFD:
 132  133                  if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 133  134                          retval = fdres;
 134  135                  goto out;
 135  136          }
 136  137  
 137  138          /*
 138  139           * Second, for speed, deal with the subset of cases that
 139  140           * require getf() / releasef() but do not require copyin.
 140  141           */
 141  142          if ((fp = getf(fdes)) == NULL) {
 142  143                  error = EBADF;
 143  144                  goto out;
 144  145          }
 145  146          iarg = (int)arg;
 146  147  
 147  148          switch (cmd) {
 148  149          case F_DUPFD:
 149  150          case F_DUPFD_CLOEXEC:
 150  151                  p = curproc;
 151  152                  if ((uint_t)iarg >= p->p_fno_ctl) {
 152  153                          if (iarg >= 0)
 153  154                                  fd_too_big(p);
 154  155                          error = EINVAL;
 155  156                          goto done;
 156  157                  }
 157  158                  /*
 158  159                   * We need to increment the f_count reference counter
 159  160                   * before allocating a new file descriptor.
 160  161                   * Doing it other way round opens a window for race condition
 161  162                   * with closeandsetf() on the target file descriptor which can
 162  163                   * close the file still referenced by the original
 163  164                   * file descriptor.
 164  165                   */
 165  166                  mutex_enter(&fp->f_tlock);
 166  167                  fp->f_count++;
 167  168                  mutex_exit(&fp->f_tlock);
 168  169                  if ((retval = ufalloc_file(iarg, fp)) == -1) {
 169  170                          /*
 170  171                           * New file descriptor can't be allocated.
 171  172                           * Revert the reference count.
 172  173                           */
 173  174                          mutex_enter(&fp->f_tlock);
 174  175                          fp->f_count--;
 175  176                          mutex_exit(&fp->f_tlock);
 176  177                          error = EMFILE;
 177  178                  } else {
 178  179                          if (cmd == F_DUPFD_CLOEXEC) {
 179  180                                  f_setfd(retval, FD_CLOEXEC);
 180  181                          }
 181  182                  }
 182  183                  goto done;
 183  184  
 184  185          case F_DUP2FD_CLOEXEC:
 185  186                  if (fdes == iarg) {
 186  187                          error = EINVAL;
 187  188                          goto done;
 188  189                  }
 189  190  
 190  191                  /*FALLTHROUGH*/
 191  192  
 192  193          case F_DUP2FD:
 193  194                  p = curproc;
 194  195                  if (fdes == iarg) {
 195  196                          retval = iarg;
 196  197                  } else if ((uint_t)iarg >= p->p_fno_ctl) {
 197  198                          if (iarg >= 0)
 198  199                                  fd_too_big(p);
 199  200                          error = EBADF;
 200  201                  } else {
 201  202                          /*
 202  203                           * We can't hold our getf(fdes) across the call to
 203  204                           * closeandsetf() because it creates a window for
 204  205                           * deadlock: if one thread is doing dup2(a, b) while
 205  206                           * another is doing dup2(b, a), each one will block
 206  207                           * waiting for the other to call releasef().  The
 207  208                           * solution is to increment the file reference count
 208  209                           * (which we have to do anyway), then releasef(fdes),
 209  210                           * then closeandsetf().  Incrementing f_count ensures
 210  211                           * that fp won't disappear after we call releasef().
 211  212                           * When closeandsetf() fails, we try avoid calling
 212  213                           * closef() because of all the side effects.
 213  214                           */
 214  215                          mutex_enter(&fp->f_tlock);
 215  216                          fp->f_count++;
 216  217                          mutex_exit(&fp->f_tlock);
 217  218                          releasef(fdes);
 218  219                          if ((error = closeandsetf(iarg, fp)) == 0) {
 219  220                                  if (cmd == F_DUP2FD_CLOEXEC) {
 220  221                                          f_setfd(iarg, FD_CLOEXEC);
 221  222                                  }
 222  223                                  retval = iarg;
 223  224                          } else {
 224  225                                  mutex_enter(&fp->f_tlock);
 225  226                                  if (fp->f_count > 1) {
 226  227                                          fp->f_count--;
 227  228                                          mutex_exit(&fp->f_tlock);
 228  229                                  } else {
 229  230                                          mutex_exit(&fp->f_tlock);
 230  231                                          (void) closef(fp);
 231  232                                  }
 232  233                          }
 233  234                          goto out;
 234  235                  }
 235  236                  goto done;
 236  237  
 237  238          case F_SETFL:
 238  239                  vp = fp->f_vnode;
 239  240                  flag = fp->f_flag;
 240  241                  if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 241  242                          iarg &= ~FNDELAY;
 242  243                  if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 243  244                      0) {
 244  245                          iarg &= FMASK;
 245  246                          mutex_enter(&fp->f_tlock);
 246  247                          fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 247  248                          fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 248  249                          mutex_exit(&fp->f_tlock);
 249  250                  }
 250  251                  retval = 0;
 251  252                  goto done;
 252  253          }
 253  254  
 254  255          /*
 255  256           * Finally, deal with the expensive cases.
 256  257           */
 257  258          retval = 0;
 258  259          in_crit = 0;
 259  260          maxoffset = MAXOFF_T;
 260  261          datamodel = DATAMODEL_NATIVE;
 261  262  #if defined(_SYSCALL32_IMPL)
 262  263          if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 263  264                  maxoffset = MAXOFF32_T;
 264  265  #endif
 265  266  
 266  267          vp = fp->f_vnode;
 267  268          flag = fp->f_flag;
 268  269          offset = fp->f_offset;
 269  270  
 270  271          switch (cmd) {
 271  272          /*
 272  273           * The file system and vnode layers understand and implement
 273  274           * locking with flock64 structures. So here once we pass through
 274  275           * the test for compatibility as defined by LFS API, (for F_SETLK,
 275  276           * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 276  277           * F_FREESP) we transform the flock structure to a flock64 structure
 277  278           * and send it to the lower layers. Similarly in case of GETLK and
 278  279           * OFD_GETLK the returned flock64 structure is transformed to a flock
 279  280           * structure if everything fits in nicely, otherwise we return
 280  281           * EOVERFLOW.
 281  282           */
 282  283  
 283  284          case F_GETLK:
 284  285          case F_O_GETLK:
 285  286          case F_SETLK:
 286  287          case F_SETLKW:
 287  288          case F_SETLK_NBMAND:
 288  289          case F_OFD_GETLK:
 289  290          case F_OFD_SETLK:
 290  291          case F_OFD_SETLKW:
 291  292          case F_FLOCK:
 292  293          case F_FLOCKW:
 293  294  
 294  295                  /*
 295  296                   * Copy in input fields only.
 296  297                   */
 297  298  
 298  299                  if (cmd == F_O_GETLK) {
 299  300                          if (datamodel != DATAMODEL_ILP32) {
 300  301                                  error = EINVAL;
 301  302                                  break;
 302  303                          }
 303  304  
 304  305                          if (copyin((void *)arg, &obf, sizeof (obf))) {
 305  306                                  error = EFAULT;
 306  307                                  break;
 307  308                          }
 308  309                          bf.l_type = obf.l_type;
 309  310                          bf.l_whence = obf.l_whence;
 310  311                          bf.l_start = (off64_t)obf.l_start;
 311  312                          bf.l_len = (off64_t)obf.l_len;
 312  313                          bf.l_sysid = (int)obf.l_sysid;
 313  314                          bf.l_pid = obf.l_pid;
 314  315                  } else if (datamodel == DATAMODEL_NATIVE) {
 315  316                          if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 316  317                                  error = EFAULT;
 317  318                                  break;
 318  319                          }
 319  320                          /*
 320  321                           * XXX  In an LP64 kernel with an LP64 application
 321  322                           *      there's no need to do a structure copy here
 322  323                           *      struct flock == struct flock64. However,
 323  324                           *      we did it this way to avoid more conditional
 324  325                           *      compilation.
 325  326                           */
 326  327                          bf.l_type = sbf.l_type;
 327  328                          bf.l_whence = sbf.l_whence;
 328  329                          bf.l_start = (off64_t)sbf.l_start;
 329  330                          bf.l_len = (off64_t)sbf.l_len;
 330  331                          bf.l_sysid = sbf.l_sysid;
 331  332                          bf.l_pid = sbf.l_pid;
 332  333                  }
 333  334  #if defined(_SYSCALL32_IMPL)
 334  335                  else {
 335  336                          struct flock32 sbf32;
 336  337                          if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 337  338                                  error = EFAULT;
 338  339                                  break;
 339  340                          }
 340  341                          bf.l_type = sbf32.l_type;
 341  342                          bf.l_whence = sbf32.l_whence;
 342  343                          bf.l_start = (off64_t)sbf32.l_start;
 343  344                          bf.l_len = (off64_t)sbf32.l_len;
 344  345                          bf.l_sysid = sbf32.l_sysid;
 345  346                          bf.l_pid = sbf32.l_pid;
 346  347                  }
 347  348  #endif /* _SYSCALL32_IMPL */
 348  349  
 349  350                  /*
 350  351                   * 64-bit support: check for overflow for 32-bit lock ops
 351  352                   */
 352  353                  if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 353  354                          break;
 354  355  
 355  356                  if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 356  357                          /* FLOCK* locking is always over the entire file. */
 357  358                          if (bf.l_whence != 0 || bf.l_start != 0 ||

↓ open down ↓

290 lines elided

↑ open up ↑

 358  359                              bf.l_len != 0) {
 359  360                                  error = EINVAL;
 360  361                                  break;
 361  362                          }
 362  363                          if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 363  364                                  error = EINVAL;
 364  365                                  break;
 365  366                          }
 366  367                  }
 367  368  
 368      -                if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
      369 +                if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
      370 +                    cmd == F_OFD_SETLKW) {
 369  371                          /*
 370  372                           * TBD OFD-style locking is currently limited to
 371  373                           * covering the entire file.
 372  374                           */
 373  375                          if (bf.l_whence != 0 || bf.l_start != 0 ||
 374  376                              bf.l_len != 0) {
 375  377                                  error = EINVAL;
 376  378                                  break;
 377  379                          }
 378  380                  }

 379  381  
 380  382                  /*
 381  383                   * Not all of the filesystems understand F_O_GETLK, and
 382  384                   * there's no need for them to know.  Map it to F_GETLK.
 383  385                   *
 384  386                   * The *_frlock functions in the various file systems basically
 385  387                   * do some validation and then funnel everything through the
 386  388                   * fs_frlock function. For OFD-style locks fs_frlock will do
 387  389                   * nothing so that once control returns here we can call the
 388  390                   * ofdlock function with the correct fp. For OFD-style locks
 389  391                   * the unsupported remote file systems, such as NFS, detect and
 390  392                   * reject the OFD-style cmd argument.
 391  393                   */
 392  394                  if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 393  395                      &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 394  396                          break;
 395  397  
 396  398                  if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 397  399                      cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 398  400                          /*
 399  401                           * This is an OFD-style lock so we need to handle it
 400  402                           * here. Because OFD-style locks are associated with
 401  403                           * the file_t we didn't have enough info down the
 402  404                           * VOP_FRLOCK path immediately above.
 403  405                           */
 404  406                          if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 405  407                                  break;
 406  408                  }
 407  409  
 408  410                  /*
 409  411                   * If command is GETLK and no lock is found, only
 410  412                   * the type field is changed.
 411  413                   */
 412  414                  if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 413  415                      cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 414  416                          /* l_type always first entry, always a short */
 415  417                          if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 416  418                              sizeof (bf.l_type)))
 417  419                                  error = EFAULT;
 418  420                          break;
 419  421                  }
 420  422  
 421  423                  if (cmd == F_O_GETLK) {
 422  424                          /*
 423  425                           * Return an SVR3 flock structure to the user.
 424  426                           */
 425  427                          obf.l_type = (int16_t)bf.l_type;
 426  428                          obf.l_whence = (int16_t)bf.l_whence;
 427  429                          obf.l_start = (int32_t)bf.l_start;
 428  430                          obf.l_len = (int32_t)bf.l_len;
 429  431                          if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 430  432                                  /*
 431  433                                   * One or both values for the above fields
 432  434                                   * is too large to store in an SVR3 flock
 433  435                                   * structure.
 434  436                                   */
 435  437                                  error = EOVERFLOW;
 436  438                                  break;
 437  439                          }
 438  440                          obf.l_sysid = (int16_t)bf.l_sysid;
 439  441                          obf.l_pid = (int16_t)bf.l_pid;
 440  442                          if (copyout(&obf, (void *)arg, sizeof (obf)))
 441  443                                  error = EFAULT;
 442  444                  } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 443  445                          /*
 444  446                           * Copy out SVR4 flock.
 445  447                           */
 446  448                          int i;
 447  449  
 448  450                          if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 449  451                                  error = EOVERFLOW;
 450  452                                  break;
 451  453                          }
 452  454  
 453  455                          if (datamodel == DATAMODEL_NATIVE) {
 454  456                                  for (i = 0; i < 4; i++)
 455  457                                          sbf.l_pad[i] = 0;
 456  458                                  /*
 457  459                                   * XXX  In an LP64 kernel with an LP64
 458  460                                   *      application there's no need to do a
 459  461                                   *      structure copy here as currently
 460  462                                   *      struct flock == struct flock64.
 461  463                                   *      We did it this way to avoid more
 462  464                                   *      conditional compilation.
 463  465                                   */
 464  466                                  sbf.l_type = bf.l_type;
 465  467                                  sbf.l_whence = bf.l_whence;
 466  468                                  sbf.l_start = (off_t)bf.l_start;
 467  469                                  sbf.l_len = (off_t)bf.l_len;
 468  470                                  sbf.l_sysid = bf.l_sysid;
 469  471                                  sbf.l_pid = bf.l_pid;
 470  472                                  if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 471  473                                          error = EFAULT;
 472  474                          }
 473  475  #if defined(_SYSCALL32_IMPL)
 474  476                          else {
 475  477                                  struct flock32 sbf32;
 476  478                                  if (bf.l_start > MAXOFF32_T ||
 477  479                                      bf.l_len > MAXOFF32_T) {
 478  480                                          error = EOVERFLOW;
 479  481                                          break;
 480  482                                  }
 481  483                                  for (i = 0; i < 4; i++)
 482  484                                          sbf32.l_pad[i] = 0;
 483  485                                  sbf32.l_type = (int16_t)bf.l_type;
 484  486                                  sbf32.l_whence = (int16_t)bf.l_whence;
 485  487                                  sbf32.l_start = (off32_t)bf.l_start;
 486  488                                  sbf32.l_len = (off32_t)bf.l_len;
 487  489                                  sbf32.l_sysid = (int32_t)bf.l_sysid;
 488  490                                  sbf32.l_pid = (pid32_t)bf.l_pid;
 489  491                                  if (copyout(&sbf32,
 490  492                                      (void *)arg, sizeof (sbf32)))
 491  493                                          error = EFAULT;
 492  494                          }
 493  495  #endif
 494  496                  }
 495  497                  break;
 496  498  
 497  499          case F_CHKFL:
 498  500                  /*
 499  501                   * This is for internal use only, to allow the vnode layer
 500  502                   * to validate a flags setting before applying it.  User
 501  503                   * programs can't issue it.
 502  504                   */
 503  505                  error = EINVAL;
 504  506                  break;
 505  507  
 506  508          case F_ALLOCSP:
 507  509          case F_FREESP:
 508  510          case F_ALLOCSP64:
 509  511          case F_FREESP64:
 510  512                  /*
 511  513                   * Test for not-a-regular-file (and returning EINVAL)
 512  514                   * before testing for open-for-writing (and returning EBADF).
 513  515                   * This is relied upon by posix_fallocate() in libc.
 514  516                   */
 515  517                  if (vp->v_type != VREG) {
 516  518                          error = EINVAL;
 517  519                          break;
 518  520                  }
 519  521  
 520  522                  if ((flag & FWRITE) == 0) {
 521  523                          error = EBADF;
 522  524                          break;
 523  525                  }
 524  526  
 525  527                  if (datamodel != DATAMODEL_ILP32 &&
 526  528                      (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 527  529                          error = EINVAL;
 528  530                          break;
 529  531                  }
 530  532  
 531  533  #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 532  534                  if (datamodel == DATAMODEL_ILP32 &&
 533  535                      (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 534  536                          struct flock32 sbf32;
 535  537                          /*
 536  538                           * For compatibility we overlay an SVR3 flock on an SVR4
 537  539                           * flock.  This works because the input field offsets
 538  540                           * in "struct flock" were preserved.
 539  541                           */
 540  542                          if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 541  543                                  error = EFAULT;
 542  544                                  break;
 543  545                          } else {
 544  546                                  bf.l_type = sbf32.l_type;
 545  547                                  bf.l_whence = sbf32.l_whence;
 546  548                                  bf.l_start = (off64_t)sbf32.l_start;
 547  549                                  bf.l_len = (off64_t)sbf32.l_len;
 548  550                                  bf.l_sysid = sbf32.l_sysid;
 549  551                                  bf.l_pid = sbf32.l_pid;
 550  552                          }
 551  553                  }
 552  554  #endif /* _ILP32 || _SYSCALL32_IMPL */
 553  555  
 554  556  #if defined(_LP64)
 555  557                  if (datamodel == DATAMODEL_LP64 &&
 556  558                      (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 557  559                          if (copyin((void *)arg, &bf, sizeof (bf))) {
 558  560                                  error = EFAULT;
 559  561                                  break;
 560  562                          }
 561  563                  }
 562  564  #endif /* defined(_LP64) */
 563  565  
 564  566  #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 565  567                  if (datamodel == DATAMODEL_ILP32 &&
 566  568                      (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 567  569                          if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 568  570                                  error = EFAULT;
 569  571                                  break;
 570  572                          } else {
 571  573                                  /*
 572  574                                   * Note that the size of flock64 is different in
 573  575                                   * the ILP32 and LP64 models, due to the l_pad
 574  576                                   * field. We do not want to assume that the
 575  577                                   * flock64 structure is laid out the same in
 576  578                                   * ILP32 and LP64 environments, so we will
 577  579                                   * copy in the ILP32 version of flock64
 578  580                                   * explicitly and copy it to the native
 579  581                                   * flock64 structure.
 580  582                                   */
 581  583                                  bf.l_type = (short)bf64_32.l_type;
 582  584                                  bf.l_whence = (short)bf64_32.l_whence;
 583  585                                  bf.l_start = bf64_32.l_start;
 584  586                                  bf.l_len = bf64_32.l_len;
 585  587                                  bf.l_sysid = (int)bf64_32.l_sysid;
 586  588                                  bf.l_pid = (pid_t)bf64_32.l_pid;
 587  589                          }
 588  590                  }
 589  591  #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 590  592  
 591  593                  if (cmd == F_ALLOCSP || cmd == F_FREESP)
 592  594                          error = flock_check(vp, &bf, offset, maxoffset);
 593  595                  else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 594  596                          error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 595  597                  if (error)
 596  598                          break;
 597  599  
 598  600                  if (vp->v_type == VREG && bf.l_len == 0 &&
 599  601                      bf.l_start > OFFSET_MAX(fp)) {
 600  602                          error = EFBIG;
 601  603                          break;
 602  604                  }
 603  605  
 604  606                  /*
 605  607                   * Make sure that there are no conflicting non-blocking
 606  608                   * mandatory locks in the region being manipulated. If
 607  609                   * there are such locks then return EACCES.
 608  610                   */
 609  611                  if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 610  612                          break;
 611  613  
 612  614                  if (nbl_need_check(vp)) {
 613  615                          u_offset_t      begin;
 614  616                          ssize_t         length;
 615  617  
 616  618                          nbl_start_crit(vp, RW_READER);
 617  619                          in_crit = 1;
 618  620                          vattr.va_mask = AT_SIZE;
 619  621                          if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 620  622                              != 0)
 621  623                                  break;
 622  624                          begin = start > vattr.va_size ? vattr.va_size : start;
 623  625                          length = vattr.va_size > start ? vattr.va_size - start :
 624  626                              start - vattr.va_size;
 625  627                          if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 626  628                              NULL)) {
 627  629                                  error = EACCES;
 628  630                                  break;
 629  631                          }
 630  632                  }
 631  633  
 632  634                  if (cmd == F_ALLOCSP64)
 633  635                          cmd = F_ALLOCSP;
 634  636                  else if (cmd == F_FREESP64)
 635  637                          cmd = F_FREESP;
 636  638  
 637  639                  error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 638  640  
 639  641                  break;
 640  642  
 641  643  #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 642  644          case F_GETLK64:
 643  645          case F_SETLK64:
 644  646          case F_SETLKW64:
 645  647          case F_SETLK64_NBMAND:
 646  648          case F_OFD_GETLK64:
 647  649          case F_OFD_SETLK64:
 648  650          case F_OFD_SETLKW64:
 649  651          case F_FLOCK64:
 650  652          case F_FLOCKW64:
 651  653                  /*
 652  654                   * Large Files: Here we set cmd as *LK and send it to
 653  655                   * lower layers. *LK64 is only for the user land.
 654  656                   * Most of the comments described above for F_SETLK
 655  657                   * applies here too.
 656  658                   * Large File support is only needed for ILP32 apps!
 657  659                   */
 658  660                  if (datamodel != DATAMODEL_ILP32) {
 659  661                          error = EINVAL;
 660  662                          break;
 661  663                  }
 662  664  
 663  665                  if (cmd == F_GETLK64)
 664  666                          cmd = F_GETLK;
 665  667                  else if (cmd == F_SETLK64)
 666  668                          cmd = F_SETLK;
 667  669                  else if (cmd == F_SETLKW64)
 668  670                          cmd = F_SETLKW;
 669  671                  else if (cmd == F_SETLK64_NBMAND)
 670  672                          cmd = F_SETLK_NBMAND;
 671  673                  else if (cmd == F_OFD_GETLK64)
 672  674                          cmd = F_OFD_GETLK;
 673  675                  else if (cmd == F_OFD_SETLK64)
 674  676                          cmd = F_OFD_SETLK;
 675  677                  else if (cmd == F_OFD_SETLKW64)
 676  678                          cmd = F_OFD_SETLKW;
 677  679                  else if (cmd == F_FLOCK64)
 678  680                          cmd = F_FLOCK;
 679  681                  else if (cmd == F_FLOCKW64)
 680  682                          cmd = F_FLOCKW;
 681  683  
 682  684                  /*
 683  685                   * Note that the size of flock64 is different in the ILP32
 684  686                   * and LP64 models, due to the sucking l_pad field.
 685  687                   * We do not want to assume that the flock64 structure is
 686  688                   * laid out in the same in ILP32 and LP64 environments, so
 687  689                   * we will copy in the ILP32 version of flock64 explicitly
 688  690                   * and copy it to the native flock64 structure.
 689  691                   */
 690  692  
 691  693                  if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 692  694                          error = EFAULT;
 693  695                          break;
 694  696                  }
 695  697  
 696  698                  bf.l_type = (short)bf64_32.l_type;
 697  699                  bf.l_whence = (short)bf64_32.l_whence;
 698  700                  bf.l_start = bf64_32.l_start;
 699  701                  bf.l_len = bf64_32.l_len;
 700  702                  bf.l_sysid = (int)bf64_32.l_sysid;
 701  703                  bf.l_pid = (pid_t)bf64_32.l_pid;
 702  704  
 703  705                  if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 704  706                          break;
 705  707  
 706  708                  if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 707  709                          /* FLOCK* locking is always over the entire file. */
 708  710                          if (bf.l_whence != 0 || bf.l_start != 0 ||
 709  711                              bf.l_len != 0) {
 710  712                                  error = EINVAL;
 711  713                                  break;
 712  714                          }
 713  715                          if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 714  716                                  error = EINVAL;
 715  717                                  break;
 716  718                          }
 717  719                  }
 718  720  
 719  721                  if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 720  722                          /*
 721  723                           * TBD OFD-style locking is currently limited to
 722  724                           * covering the entire file.
 723  725                           */
 724  726                          if (bf.l_whence != 0 || bf.l_start != 0 ||
 725  727                              bf.l_len != 0) {
 726  728                                  error = EINVAL;
 727  729                                  break;
 728  730                          }
 729  731                  }
 730  732  
 731  733                  /*
 732  734                   * The *_frlock functions in the various file systems basically
 733  735                   * do some validation and then funnel everything through the
 734  736                   * fs_frlock function. For OFD-style locks fs_frlock will do
 735  737                   * nothing so that once control returns here we can call the
 736  738                   * ofdlock function with the correct fp. For OFD-style locks
 737  739                   * the unsupported remote file systems, such as NFS, detect and
 738  740                   * reject the OFD-style cmd argument.
 739  741                   */
 740  742                  if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 741  743                      NULL, fp->f_cred, NULL)) != 0)
 742  744                          break;
 743  745  
 744  746                  if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 745  747                      cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 746  748                          /*
 747  749                           * This is an OFD-style lock so we need to handle it
 748  750                           * here. Because OFD-style locks are associated with
 749  751                           * the file_t we didn't have enough info down the
 750  752                           * VOP_FRLOCK path immediately above.
 751  753                           */
 752  754                          if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 753  755                                  break;
 754  756                  }
 755  757  
 756  758                  if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 757  759                      bf.l_type == F_UNLCK) {
 758  760                          if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 759  761                              sizeof (bf.l_type)))
 760  762                                  error = EFAULT;
 761  763                          break;
 762  764                  }
 763  765  
 764  766                  if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 765  767                          int i;
 766  768  
 767  769                          /*
 768  770                           * We do not want to assume that the flock64 structure
 769  771                           * is laid out in the same in ILP32 and LP64
 770  772                           * environments, so we will copy out the ILP32 version
 771  773                           * of flock64 explicitly after copying the native
 772  774                           * flock64 structure to it.
 773  775                           */
 774  776                          for (i = 0; i < 4; i++)
 775  777                                  bf64_32.l_pad[i] = 0;
 776  778                          bf64_32.l_type = (int16_t)bf.l_type;
 777  779                          bf64_32.l_whence = (int16_t)bf.l_whence;
 778  780                          bf64_32.l_start = bf.l_start;
 779  781                          bf64_32.l_len = bf.l_len;
 780  782                          bf64_32.l_sysid = (int32_t)bf.l_sysid;
 781  783                          bf64_32.l_pid = (pid32_t)bf.l_pid;
 782  784                          if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 783  785                                  error = EFAULT;
 784  786                  }
 785  787                  break;
 786  788  #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 787  789  
 788  790          case F_SHARE:
 789  791          case F_SHARE_NBMAND:
 790  792          case F_UNSHARE:
 791  793  
 792  794                  /*
 793  795                   * Copy in input fields only.
 794  796                   */
 795  797                  if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 796  798                          error = EFAULT;
 797  799                          break;
 798  800                  }
 799  801  
 800  802                  /*
 801  803                   * Local share reservations always have this simple form
 802  804                   */
 803  805                  shr.s_access = fsh.f_access;
 804  806                  shr.s_deny = fsh.f_deny;
 805  807                  shr.s_sysid = 0;
 806  808                  shr.s_pid = ttoproc(curthread)->p_pid;
 807  809                  shr_own.sl_pid = shr.s_pid;
 808  810                  shr_own.sl_id = fsh.f_id;
 809  811                  shr.s_own_len = sizeof (shr_own);
 810  812                  shr.s_owner = (caddr_t)&shr_own;
 811  813                  error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 812  814                  break;
 813  815  
 814  816          default:
 815  817                  error = EINVAL;
 816  818                  break;
 817  819          }
 818  820  
 819  821          if (in_crit)
 820  822                  nbl_end_crit(vp);
 821  823  
 822  824  done:
 823  825          releasef(fdes);
 824  826  out:
 825  827          if (error)
 826  828                  return (set_errno(error));
 827  829          return (retval);
 828  830  }
 829  831  
 830  832  int
 831  833  flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 832  834  {
 833  835          struct vattr    vattr;
 834  836          int     error;
 835  837          u_offset_t start, end;
 836  838  
 837  839          /*
 838  840           * Determine the starting point of the request
 839  841           */
 840  842          switch (flp->l_whence) {
 841  843          case 0:         /* SEEK_SET */
 842  844                  start = (u_offset_t)flp->l_start;
 843  845                  if (start > max)
 844  846                          return (EINVAL);
 845  847                  break;
 846  848          case 1:         /* SEEK_CUR */
 847  849                  if (flp->l_start > (max - offset))
 848  850                          return (EOVERFLOW);
 849  851                  start = (u_offset_t)(flp->l_start + offset);
 850  852                  if (start > max)
 851  853                          return (EINVAL);
 852  854                  break;
 853  855          case 2:         /* SEEK_END */
 854  856                  vattr.va_mask = AT_SIZE;
 855  857                  if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 856  858                          return (error);
 857  859                  if (flp->l_start > (max - (offset_t)vattr.va_size))
 858  860                          return (EOVERFLOW);
 859  861                  start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 860  862                  if (start > max)
 861  863                          return (EINVAL);
 862  864                  break;
 863  865          default:
 864  866                  return (EINVAL);
 865  867          }
 866  868  
 867  869          /*
 868  870           * Determine the range covered by the request.
 869  871           */
 870  872          if (flp->l_len == 0)
 871  873                  end = MAXEND;
 872  874          else if ((offset_t)flp->l_len > 0) {
 873  875                  if (flp->l_len > (max - start + 1))
 874  876                          return (EOVERFLOW);
 875  877                  end = (u_offset_t)(start + (flp->l_len - 1));
 876  878                  ASSERT(end <= max);
 877  879          } else {
 878  880                  /*
 879  881                   * Negative length; why do we even allow this ?
 880  882                   * Because this allows easy specification of
 881  883                   * the last n bytes of the file.
 882  884                   */
 883  885                  end = start;
 884  886                  start += (u_offset_t)flp->l_len;
 885  887                  (start)++;
 886  888                  if (start > max)
 887  889                          return (EINVAL);
 888  890                  ASSERT(end <= max);
 889  891          }
 890  892          ASSERT(start <= max);
 891  893          if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 892  894              end == (offset_t)max) {
 893  895                  flp->l_len = 0;
 894  896          }
 895  897          if (start  > end)
 896  898                  return (EINVAL);
 897  899          return (0);
 898  900  }
 899  901  
 900  902  static int
 901  903  flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 902  904  {
 903  905          struct vattr    vattr;
 904  906          int     error;
 905  907  
 906  908          /*
 907  909           * Determine the starting point of the request. Assume that it is
 908  910           * a valid starting point.
 909  911           */
 910  912          switch (flp->l_whence) {
 911  913          case 0:         /* SEEK_SET */
 912  914                  *start = (u_offset_t)flp->l_start;
 913  915                  break;
 914  916          case 1:         /* SEEK_CUR */
 915  917                  *start = (u_offset_t)(flp->l_start + offset);
 916  918                  break;
 917  919          case 2:         /* SEEK_END */
 918  920                  vattr.va_mask = AT_SIZE;
 919  921                  if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 920  922                          return (error);
 921  923                  *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 922  924                  break;
 923  925          default:
 924  926                  return (EINVAL);
 925  927          }
 926  928  
 927  929          return (0);
 928  930  }
 929  931  
 930  932  /*
 931  933   * Take rctl action when the requested file descriptor is too big.
 932  934   */
 933  935  static void
 934  936  fd_too_big(proc_t *p)
 935  937  {
 936  938          mutex_enter(&p->p_lock);
 937  939          (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 938  940              p->p_rctls, p, RCA_SAFE);
 939  941          mutex_exit(&p->p_lock);
 940  942  }

↓ open down ↓

562 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX