io-lx-public Wdiff usr/src/uts/common/fs/udfs/udf_vnops.c

Print this page

OS-5148 ftruncate at offset should emit proper events
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/udfs/udf_vnops.c
          +++ new/usr/src/uts/common/fs/udfs/udf_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2015, Joyent, Inc.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/t_lock.h>
  32   32  #include <sys/param.h>
  33   33  #include <sys/time.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/resource.h>
  37   37  #include <sys/signal.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/user.h>
  40   40  #include <sys/buf.h>
  41   41  #include <sys/vfs.h>
  42   42  #include <sys/vfs_opreg.h>
  43   43  #include <sys/stat.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/mode.h>
  46   46  #include <sys/proc.h>
  47   47  #include <sys/disp.h>
  48   48  #include <sys/file.h>
  49   49  #include <sys/fcntl.h>
  50   50  #include <sys/flock.h>
  51   51  #include <sys/kmem.h>
  52   52  #include <sys/uio.h>
  53   53  #include <sys/dnlc.h>
  54   54  #include <sys/conf.h>
  55   55  #include <sys/errno.h>
  56   56  #include <sys/mman.h>
  57   57  #include <sys/fbuf.h>
  58   58  #include <sys/pathname.h>
  59   59  #include <sys/debug.h>
  60   60  #include <sys/vmsystm.h>
  61   61  #include <sys/cmn_err.h>
  62   62  #include <sys/dirent.h>
  63   63  #include <sys/errno.h>
  64   64  #include <sys/modctl.h>
  65   65  #include <sys/statvfs.h>
  66   66  #include <sys/mount.h>
  67   67  #include <sys/sunddi.h>
  68   68  #include <sys/bootconf.h>
  69   69  #include <sys/policy.h>
  70   70  
  71   71  #include <vm/hat.h>
  72   72  #include <vm/page.h>
  73   73  #include <vm/pvn.h>
  74   74  #include <vm/as.h>
  75   75  #include <vm/seg.h>
  76   76  #include <vm/seg_map.h>
  77   77  #include <vm/seg_kmem.h>
  78   78  #include <vm/seg_vn.h>
  79   79  #include <vm/rm.h>
  80   80  #include <vm/page.h>
  81   81  #include <sys/swap.h>
  82   82  
  83   83  #include <fs/fs_subr.h>
  84   84  
  85   85  #include <sys/fs/udf_volume.h>
  86   86  #include <sys/fs/udf_inode.h>
  87   87  
  88   88  static int32_t udf_open(struct vnode **,
  89   89          int32_t, struct cred *, caller_context_t *);
  90   90  static int32_t udf_close(struct vnode *,
  91   91          int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92   92  static int32_t udf_read(struct vnode *,
  93   93          struct uio *, int32_t, struct cred *, caller_context_t *);
  94   94  static int32_t udf_write(struct vnode *,
  95   95          struct uio *, int32_t, struct cred *, caller_context_t *);
  96   96  static int32_t udf_ioctl(struct vnode *,
  97   97          int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98   98          caller_context_t *);
  99   99  static int32_t udf_getattr(struct vnode *,
 100  100          struct vattr *, int32_t, struct cred *, caller_context_t *);
 101  101  static int32_t udf_setattr(struct vnode *,
 102  102          struct vattr *, int32_t, struct cred *, caller_context_t *);
 103  103  static int32_t udf_access(struct vnode *,
 104  104          int32_t, int32_t, struct cred *, caller_context_t *);
 105  105  static int32_t udf_lookup(struct vnode *,
 106  106          char *, struct vnode **, struct pathname *,
 107  107          int32_t, struct vnode *, struct cred *,
 108  108          caller_context_t *, int *, pathname_t *);
 109  109  static int32_t udf_create(struct vnode *,
 110  110          char *, struct vattr *, enum vcexcl,
 111  111          int32_t, struct vnode **, struct cred *, int32_t,
 112  112          caller_context_t *, vsecattr_t *);
 113  113  static int32_t udf_remove(struct vnode *,
 114  114          char *, struct cred *, caller_context_t *, int);
 115  115  static int32_t udf_link(struct vnode *,
 116  116          struct vnode *, char *, struct cred *, caller_context_t *, int);
 117  117  static int32_t udf_rename(struct vnode *,
 118  118          char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119  119  static int32_t udf_mkdir(struct vnode *,
 120  120          char *, struct vattr *, struct vnode **, struct cred *,
 121  121          caller_context_t *, int, vsecattr_t *);
 122  122  static int32_t udf_rmdir(struct vnode *,
 123  123          char *, struct vnode *, struct cred *, caller_context_t *, int);
 124  124  static int32_t udf_readdir(struct vnode *,
 125  125          struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126  126  static int32_t udf_symlink(struct vnode *,
 127  127          char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128  128  static int32_t udf_readlink(struct vnode *,
 129  129          struct uio *, struct cred *, caller_context_t *);
 130  130  static int32_t udf_fsync(struct vnode *,
 131  131          int32_t, struct cred *, caller_context_t *);
 132  132  static void udf_inactive(struct vnode *,
 133  133          struct cred *, caller_context_t *);
 134  134  static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135  135  static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136  136  static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137  137  static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138  138          caller_context_t *);
 139  139  static int32_t udf_frlock(struct vnode *, int32_t,
 140  140          struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141  141          caller_context_t *);
 142  142  static int32_t udf_space(struct vnode *, int32_t,
 143  143          struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144  144  static int32_t udf_getpage(struct vnode *, offset_t,
 145  145          size_t, uint32_t *, struct page **, size_t,
 146  146          struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147  147  static int32_t udf_putpage(struct vnode *, offset_t,
 148  148          size_t, int32_t, struct cred *, caller_context_t *);
 149  149  static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150  150          caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151  151          caller_context_t *);
 152  152  static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153  153          caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154  154          caller_context_t *);
 155  155  static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156  156          caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157  157          caller_context_t *);
 158  158  static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159  159          ulong_t *, struct cred *, caller_context_t *);
 160  160  static int32_t udf_pageio(struct vnode *, struct page *,
 161  161          u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162  162  
 163  163  int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164  164          size_t, struct seg *, caddr_t, page_t *pl[],
 165  165          size_t, enum seg_rw, int32_t);
 166  166  void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167  167  int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168  168  int32_t ud_page_fill(struct ud_inode *, page_t *,
 169  169          u_offset_t, uint32_t, u_offset_t *);
 170  170  int32_t ud_iodone(struct buf *);
 171  171  int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172  172  int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173  173  int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174  174  int32_t ud_slave_done(struct buf *);
 175  175  
 176  176  /*
 177  177   * Structures to control multiple IO operations to get or put pages
 178  178   * that are backed by discontiguous blocks. The master struct is
 179  179   * a dummy that holds the original bp from pageio_setup. The
 180  180   * slave struct holds the working bp's to do the actual IO. Once
 181  181   * all the slave IOs complete. The master is processed as if a single
 182  182   * IO op has completed.
 183  183   */
 184  184  uint32_t master_index = 0;
 185  185  typedef struct mio_master {
 186  186          kmutex_t        mm_mutex;       /* protect the fields below */
 187  187          int32_t         mm_size;
 188  188          buf_t           *mm_bp;         /* original bp */
 189  189          int32_t         mm_resid;       /* bytes remaining to transfer */
 190  190          int32_t         mm_error;       /* accumulated error from slaves */
 191  191          int32_t         mm_index;       /* XXX debugging */
 192  192  } mio_master_t;
 193  193  
 194  194  typedef struct mio_slave {
 195  195          buf_t           ms_buf;         /* working buffer for this IO chunk */
 196  196          mio_master_t    *ms_ptr;        /* pointer to master */
 197  197  } mio_slave_t;
 198  198  
 199  199  struct vnodeops *udf_vnodeops;
 200  200  
 201  201  const fs_operation_def_t udf_vnodeops_template[] = {
 202  202          VOPNAME_OPEN,           { .vop_open = udf_open },
 203  203          VOPNAME_CLOSE,          { .vop_close = udf_close },
 204  204          VOPNAME_READ,           { .vop_read = udf_read },
 205  205          VOPNAME_WRITE,          { .vop_write = udf_write },
 206  206          VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207  207          VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208  208          VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209  209          VOPNAME_ACCESS,         { .vop_access = udf_access },
 210  210          VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211  211          VOPNAME_CREATE,         { .vop_create = udf_create },
 212  212          VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213  213          VOPNAME_LINK,           { .vop_link = udf_link },
 214  214          VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215  215          VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216  216          VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217  217          VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218  218          VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219  219          VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220  220          VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221  221          VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222  222          VOPNAME_FID,            { .vop_fid = udf_fid },
 223  223          VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224  224          VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225  225          VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226  226          VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227  227          VOPNAME_SPACE,          { .vop_space = udf_space },
 228  228          VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229  229          VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230  230          VOPNAME_MAP,            { .vop_map = udf_map },
 231  231          VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232  232          VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233  233          VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234  234          VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235  235          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236  236          NULL,                   NULL
 237  237  };
 238  238  
 239  239  /* ARGSUSED */
 240  240  static int32_t
 241  241  udf_open(
 242  242          struct vnode **vpp,
 243  243          int32_t flag,
 244  244          struct cred *cr,
 245  245          caller_context_t *ct)
 246  246  {
 247  247          ud_printf("udf_open\n");
 248  248  
 249  249          return (0);
 250  250  }
 251  251  
 252  252  /* ARGSUSED */
 253  253  static int32_t
 254  254  udf_close(
 255  255          struct vnode *vp,
 256  256          int32_t flag,
 257  257          int32_t count,
 258  258          offset_t offset,
 259  259          struct cred *cr,
 260  260          caller_context_t *ct)
 261  261  {
 262  262          struct ud_inode *ip = VTOI(vp);
 263  263  
 264  264          ud_printf("udf_close\n");
 265  265  
 266  266          ITIMES(ip);
 267  267  
 268  268          cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269  269          cleanshares(vp, ttoproc(curthread)->p_pid);
 270  270  
 271  271          /*
 272  272           * Push partially filled cluster at last close.
 273  273           * ``last close'' is approximated because the dnlc
 274  274           * may have a hold on the vnode.
 275  275           */
 276  276          if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277  277                  struct ud_inode *ip = VTOI(vp);
 278  278                  if (ip->i_delaylen) {
 279  279                          (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280  280                              B_ASYNC | B_FREE, cr);
 281  281                          ip->i_delaylen = 0;
 282  282                  }
 283  283          }
 284  284  
 285  285          return (0);
 286  286  }
 287  287  
 288  288  /* ARGSUSED */
 289  289  static int32_t
 290  290  udf_read(
 291  291          struct vnode *vp,
 292  292          struct uio *uiop,
 293  293          int32_t ioflag,
 294  294          struct cred *cr,
 295  295          caller_context_t *ct)
 296  296  {
 297  297          struct ud_inode *ip = VTOI(vp);
 298  298          int32_t error;
 299  299  
 300  300          ud_printf("udf_read\n");
 301  301  
 302  302  #ifdef  __lock_lint
 303  303          rw_enter(&ip->i_rwlock, RW_READER);
 304  304  #endif
 305  305  
 306  306          ASSERT(RW_READ_HELD(&ip->i_rwlock));
 307  307  
 308  308          if (MANDLOCK(vp, ip->i_char)) {
 309  309                  /*
 310  310                   * udf_getattr ends up being called by chklock
 311  311                   */
 312  312                  error = chklock(vp, FREAD, uiop->uio_loffset,
 313  313                      uiop->uio_resid, uiop->uio_fmode, ct);
 314  314                  if (error) {
 315  315                          goto end;
 316  316                  }
 317  317          }
 318  318  
 319  319          rw_enter(&ip->i_contents, RW_READER);
 320  320          error = ud_rdip(ip, uiop, ioflag, cr);
 321  321          rw_exit(&ip->i_contents);
 322  322  
 323  323  end:
 324  324  #ifdef  __lock_lint
 325  325          rw_exit(&ip->i_rwlock);
 326  326  #endif
 327  327  
 328  328          return (error);
 329  329  }
 330  330  
 331  331  
 332  332  int32_t ud_WRITES = 1;
 333  333  int32_t ud_HW = 96 * 1024;
 334  334  int32_t ud_LW = 64 * 1024;
 335  335  int32_t ud_throttles = 0;
 336  336  
 337  337  /* ARGSUSED */
 338  338  static int32_t
 339  339  udf_write(
 340  340          struct vnode *vp,
 341  341          struct uio *uiop,
 342  342          int32_t ioflag,
 343  343          struct cred *cr,
 344  344          caller_context_t *ct)
 345  345  {
 346  346          struct ud_inode *ip = VTOI(vp);
 347  347          int32_t error = 0;
 348  348  
 349  349          ud_printf("udf_write\n");
 350  350  
 351  351  #ifdef  __lock_lint
 352  352          rw_enter(&ip->i_rwlock, RW_WRITER);
 353  353  #endif
 354  354  
 355  355          ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 356  356  
 357  357          if (MANDLOCK(vp, ip->i_char)) {
 358  358                  /*
 359  359                   * ud_getattr ends up being called by chklock
 360  360                   */
 361  361                  error = chklock(vp, FWRITE, uiop->uio_loffset,
 362  362                      uiop->uio_resid, uiop->uio_fmode, ct);
 363  363                  if (error) {
 364  364                          goto end;
 365  365                  }
 366  366          }
 367  367          /*
 368  368           * Throttle writes.
 369  369           */
 370  370          mutex_enter(&ip->i_tlock);
 371  371          if (ud_WRITES && (ip->i_writes > ud_HW)) {
 372  372                  while (ip->i_writes > ud_HW) {
 373  373                          ud_throttles++;
 374  374                          cv_wait(&ip->i_wrcv, &ip->i_tlock);
 375  375                  }
 376  376          }
 377  377          mutex_exit(&ip->i_tlock);
 378  378  
 379  379          /*
 380  380           * Write to the file
 381  381           */
 382  382          rw_enter(&ip->i_contents, RW_WRITER);
 383  383          if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 384  384                  /*
 385  385                   * In append mode start at end of file.
 386  386                   */
 387  387                  uiop->uio_loffset = ip->i_size;
 388  388          }
 389  389          error = ud_wrip(ip, uiop, ioflag, cr);
 390  390          rw_exit(&ip->i_contents);
 391  391  
 392  392  end:
 393  393  #ifdef  __lock_lint
 394  394          rw_exit(&ip->i_rwlock);
 395  395  #endif
 396  396  
 397  397          return (error);
 398  398  }
 399  399  
 400  400  /* ARGSUSED */
 401  401  static int32_t
 402  402  udf_ioctl(
 403  403          struct vnode *vp,
 404  404          int32_t cmd,
 405  405          intptr_t arg,
 406  406          int32_t flag,
 407  407          struct cred *cr,
 408  408          int32_t *rvalp,
 409  409          caller_context_t *ct)
 410  410  {
 411  411          return (ENOTTY);
 412  412  }
 413  413  
 414  414  /* ARGSUSED */
 415  415  static int32_t
 416  416  udf_getattr(
 417  417          struct vnode *vp,
 418  418          struct vattr *vap,
 419  419          int32_t flags,
 420  420          struct cred *cr,
 421  421          caller_context_t *ct)
 422  422  {
 423  423          struct ud_inode *ip = VTOI(vp);
 424  424  
 425  425          ud_printf("udf_getattr\n");
 426  426  
 427  427          if (vap->va_mask == AT_SIZE) {
 428  428                  /*
 429  429                   * for performance, if only the size is requested don't bother
 430  430                   * with anything else.
 431  431                   */
 432  432                  vap->va_size = ip->i_size;
 433  433                  return (0);
 434  434          }
 435  435  
 436  436          rw_enter(&ip->i_contents, RW_READER);
 437  437  
 438  438          vap->va_type = vp->v_type;
 439  439          vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 440  440  
 441  441          vap->va_uid = ip->i_uid;
 442  442          vap->va_gid = ip->i_gid;
 443  443          vap->va_fsid = ip->i_dev;
 444  444          vap->va_nodeid = ip->i_icb_lbano;
 445  445          vap->va_nlink = ip->i_nlink;
 446  446          vap->va_size = ip->i_size;
 447  447          vap->va_seq = ip->i_seq;
 448  448          if (vp->v_type == VCHR || vp->v_type == VBLK) {
 449  449                  vap->va_rdev = ip->i_rdev;
 450  450          } else {
 451  451                  vap->va_rdev = 0;
 452  452          }
 453  453  
 454  454          mutex_enter(&ip->i_tlock);
 455  455          ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 456  456          vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 457  457          vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 458  458          vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 459  459          vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 460  460          vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 461  461          vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 462  462          mutex_exit(&ip->i_tlock);
 463  463  
 464  464          switch (ip->i_type) {
 465  465                  case VBLK:
 466  466                          vap->va_blksize = MAXBSIZE;
 467  467                          break;
 468  468                  case VCHR:
 469  469                          vap->va_blksize = MAXBSIZE;
 470  470                          break;
 471  471                  default:
 472  472                          vap->va_blksize = ip->i_udf->udf_lbsize;
 473  473                          break;
 474  474          }
 475  475          vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 476  476  
 477  477          rw_exit(&ip->i_contents);
 478  478  
 479  479          return (0);
 480  480  }
 481  481  
 482  482  static int
 483  483  ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 484  484  {
 485  485          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 486  486  }
 487  487  
 488  488  /*ARGSUSED4*/
 489  489  static int32_t
 490  490  udf_setattr(
 491  491          struct vnode *vp,
 492  492          struct vattr *vap,
 493  493          int32_t flags,
 494  494          struct cred *cr,
 495  495          caller_context_t *ct)
 496  496  {
 497  497          int32_t error = 0;
 498  498          uint32_t mask = vap->va_mask;
 499  499          struct ud_inode *ip;
 500  500          timestruc_t now;
 501  501          struct vattr ovap;
 502  502  
 503  503          ud_printf("udf_setattr\n");
 504  504  
 505  505          ip = VTOI(vp);
 506  506  
 507  507          /*
 508  508           * not updates allowed to 4096 files
 509  509           */
 510  510          if (ip->i_astrat == STRAT_TYPE4096) {
 511  511                  return (EINVAL);
 512  512          }
 513  513  
 514  514          /*
 515  515           * Cannot set these attributes
 516  516           */
 517  517          if (mask & AT_NOSET) {
 518  518                  return (EINVAL);
 519  519          }
 520  520  
 521  521          rw_enter(&ip->i_rwlock, RW_WRITER);
 522  522          rw_enter(&ip->i_contents, RW_WRITER);
 523  523  
 524  524          ovap.va_uid = ip->i_uid;
 525  525          ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 526  526          error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 527  527              ud_iaccess_vmode, ip);
 528  528          if (error)
 529  529                  goto update_inode;
 530  530  
 531  531          mask = vap->va_mask;
 532  532          /*
 533  533           * Change file access modes.
 534  534           */
 535  535          if (mask & AT_MODE) {
 536  536                  ip->i_perm = VA2UD_PERM(vap->va_mode);
 537  537                  ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 538  538                  mutex_enter(&ip->i_tlock);
 539  539                  ip->i_flag |= ICHG;
 540  540                  mutex_exit(&ip->i_tlock);
 541  541          }
 542  542          if (mask & (AT_UID|AT_GID)) {
 543  543                  if (mask & AT_UID) {
 544  544                          ip->i_uid = vap->va_uid;
 545  545                  }
 546  546                  if (mask & AT_GID) {
 547  547                          ip->i_gid = vap->va_gid;
 548  548                  }
 549  549                  mutex_enter(&ip->i_tlock);
 550  550                  ip->i_flag |= ICHG;
 551  551                  mutex_exit(&ip->i_tlock);
 552  552          }
 553  553          /*
 554  554           * Truncate file.  Must have write permission and not be a directory.
 555  555           */
 556  556          if (mask & AT_SIZE) {
 557  557                  if (vp->v_type == VDIR) {
 558  558                          error = EISDIR;
 559  559                          goto update_inode;
 560  560                  }
 561  561                  if (error = ud_iaccess(ip, IWRITE, cr, 0)) {

↓ open down ↓

561 lines elided

↑ open up ↑

 562  562                          goto update_inode;
 563  563                  }
 564  564                  if (vap->va_size > MAXOFFSET_T) {
 565  565                          error = EFBIG;
 566  566                          goto update_inode;
 567  567                  }
 568  568                  if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 569  569                          goto update_inode;
 570  570                  }
 571  571  
 572      -                if (vap->va_size == 0)
      572 +                if (vap->va_size == 0) {
 573  573                          vnevent_truncate(vp, ct);
      574 +                } else {
      575 +                        vnevent_resize(vp, ct);
      576 +                }
 574  577          }
 575  578          /*
 576  579           * Change file access or modified times.
 577  580           */
 578  581          if (mask & (AT_ATIME|AT_MTIME)) {
 579  582                  mutex_enter(&ip->i_tlock);
 580  583                  if (mask & AT_ATIME) {
 581  584                          ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 582  585                          ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 583  586                          ip->i_flag &= ~IACC;

 584  587                  }
 585  588                  if (mask & AT_MTIME) {
 586  589                          ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 587  590                          ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 588  591                          gethrestime(&now);
 589  592                          ip->i_ctime.tv_sec = now.tv_sec;
 590  593                          ip->i_ctime.tv_nsec = now.tv_nsec;
 591  594                          ip->i_flag &= ~(IUPD|ICHG);
 592  595                          ip->i_flag |= IMODTIME;
 593  596                  }
 594  597                  ip->i_flag |= IMOD;
 595  598                  mutex_exit(&ip->i_tlock);
 596  599          }
 597  600  
 598  601  update_inode:
 599  602          if (curthread->t_flag & T_DONTPEND) {
 600  603                  ud_iupdat(ip, 1);
 601  604          } else {
 602  605                  ITIMES_NOLOCK(ip);
 603  606          }
 604  607          rw_exit(&ip->i_contents);
 605  608          rw_exit(&ip->i_rwlock);
 606  609  
 607  610          return (error);
 608  611  }
 609  612  
 610  613  /* ARGSUSED */
 611  614  static int32_t
 612  615  udf_access(
 613  616          struct vnode *vp,
 614  617          int32_t mode,
 615  618          int32_t flags,
 616  619          struct cred *cr,
 617  620          caller_context_t *ct)
 618  621  {
 619  622          struct ud_inode *ip = VTOI(vp);
 620  623  
 621  624          ud_printf("udf_access\n");
 622  625  
 623  626          if (ip->i_udf == NULL) {
 624  627                  return (EIO);
 625  628          }
 626  629  
 627  630          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 628  631  }
 629  632  
 630  633  int32_t udfs_stickyhack = 1;
 631  634  
 632  635  /* ARGSUSED */
 633  636  static int32_t
 634  637  udf_lookup(
 635  638          struct vnode *dvp,
 636  639          char *nm,
 637  640          struct vnode **vpp,
 638  641          struct pathname *pnp,
 639  642          int32_t flags,
 640  643          struct vnode *rdir,
 641  644          struct cred *cr,
 642  645          caller_context_t *ct,
 643  646          int *direntflags,
 644  647          pathname_t *realpnp)
 645  648  {
 646  649          int32_t error;
 647  650          struct vnode *vp;
 648  651          struct ud_inode *ip, *xip;
 649  652  
 650  653          ud_printf("udf_lookup\n");
 651  654          /*
 652  655           * Null component name is a synonym for directory being searched.
 653  656           */
 654  657          if (*nm == '\0') {
 655  658                  VN_HOLD(dvp);
 656  659                  *vpp = dvp;
 657  660                  error = 0;
 658  661                  goto out;
 659  662          }
 660  663  
 661  664          /*
 662  665           * Fast path: Check the directory name lookup cache.
 663  666           */
 664  667          ip = VTOI(dvp);
 665  668          if (vp = dnlc_lookup(dvp, nm)) {
 666  669                  /*
 667  670                   * Check accessibility of directory.
 668  671                   */
 669  672                  if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 670  673                          VN_RELE(vp);
 671  674                  }
 672  675                  xip = VTOI(vp);
 673  676          } else {
 674  677                  error = ud_dirlook(ip, nm, &xip, cr, 1);
 675  678                  ITIMES(ip);
 676  679          }
 677  680  
 678  681          if (error == 0) {
 679  682                  ip = xip;
 680  683                  *vpp = ITOV(ip);
 681  684                  if ((ip->i_type != VDIR) &&
 682  685                      (ip->i_char & ISVTX) &&
 683  686                      ((ip->i_perm & IEXEC) == 0) &&
 684  687                      udfs_stickyhack) {
 685  688                          mutex_enter(&(*vpp)->v_lock);
 686  689                          (*vpp)->v_flag |= VISSWAP;
 687  690                          mutex_exit(&(*vpp)->v_lock);
 688  691                  }
 689  692                  ITIMES(ip);
 690  693                  /*
 691  694                   * If vnode is a device return special vnode instead.
 692  695                   */
 693  696                  if (IS_DEVVP(*vpp)) {
 694  697                          struct vnode *newvp;
 695  698                          newvp = specvp(*vpp, (*vpp)->v_rdev,
 696  699                              (*vpp)->v_type, cr);
 697  700                          VN_RELE(*vpp);
 698  701                          if (newvp == NULL) {
 699  702                                  error = ENOSYS;
 700  703                          } else {
 701  704                                  *vpp = newvp;
 702  705                          }
 703  706                  }
 704  707          }
 705  708  out:
 706  709          return (error);
 707  710  }
 708  711  
 709  712  /* ARGSUSED */
 710  713  static int32_t
 711  714  udf_create(
 712  715          struct vnode *dvp,
 713  716          char *name,
 714  717          struct vattr *vap,
 715  718          enum vcexcl excl,
 716  719          int32_t mode,
 717  720          struct vnode **vpp,
 718  721          struct cred *cr,
 719  722          int32_t flag,
 720  723          caller_context_t *ct,
 721  724          vsecattr_t *vsecp)
 722  725  {
 723  726          int32_t error;
 724  727          struct ud_inode *ip = VTOI(dvp), *xip;
 725  728  
 726  729          ud_printf("udf_create\n");
 727  730  
 728  731          if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 729  732                  vap->va_mode &= ~VSVTX;
 730  733  
 731  734          if (*name == '\0') {
 732  735                  /*
 733  736                   * Null component name refers to the directory itself.
 734  737                   */
 735  738                  VN_HOLD(dvp);
 736  739                  ITIMES(ip);
 737  740                  error = EEXIST;
 738  741          } else {
 739  742                  xip = NULL;
 740  743                  rw_enter(&ip->i_rwlock, RW_WRITER);
 741  744                  error = ud_direnter(ip, name, DE_CREATE,
 742  745                      (struct ud_inode *)0, (struct ud_inode *)0,
 743  746                      vap, &xip, cr, ct);
 744  747                  rw_exit(&ip->i_rwlock);
 745  748                  ITIMES(ip);
 746  749                  ip = xip;
 747  750          }
 748  751  #ifdef  __lock_lint
 749  752          rw_enter(&ip->i_contents, RW_WRITER);
 750  753  #else
 751  754          if (ip != NULL) {
 752  755                  rw_enter(&ip->i_contents, RW_WRITER);
 753  756          }
 754  757  #endif
 755  758  
 756  759          /*
 757  760           * If the file already exists and this is a non-exclusive create,
 758  761           * check permissions and allow access for non-directories.
 759  762           * Read-only create of an existing directory is also allowed.
 760  763           * We fail an exclusive create of anything which already exists.
 761  764           */
 762  765          if (error == EEXIST) {
 763  766                  if (excl == NONEXCL) {
 764  767                          if ((ip->i_type == VDIR) && (mode & VWRITE)) {
 765  768                                  error = EISDIR;
 766  769                          } else if (mode) {
 767  770                                  error = ud_iaccess(ip,
 768  771                                      UD_UPERM2DPERM(mode), cr, 0);
 769  772                          } else {
 770  773                                  error = 0;
 771  774                          }
 772  775                  }
 773  776                  if (error) {
 774  777                          rw_exit(&ip->i_contents);
 775  778                          VN_RELE(ITOV(ip));
 776  779                          goto out;
 777  780                  } else if ((ip->i_type == VREG) &&
 778  781                      (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 779  782                          /*
 780  783                           * Truncate regular files, if requested by caller.
 781  784                           * Grab i_rwlock to make sure no one else is
 782  785                           * currently writing to the file (we promised
 783  786                           * bmap we would do this).
 784  787                           * Must get the locks in the correct order.
 785  788                           */
 786  789                          if (ip->i_size == 0) {
 787  790                                  ip->i_flag |= ICHG | IUPD;
 788  791                          } else {
 789  792                                  rw_exit(&ip->i_contents);
 790  793                                  rw_enter(&ip->i_rwlock, RW_WRITER);
 791  794                                  rw_enter(&ip->i_contents, RW_WRITER);
 792  795                                  (void) ud_itrunc(ip, 0, 0, cr);
 793  796                                  rw_exit(&ip->i_rwlock);
 794  797                          }
 795  798                          vnevent_create(ITOV(ip), ct);
 796  799                  }
 797  800          }
 798  801  
 799  802          if (error == 0) {
 800  803                  *vpp = ITOV(ip);
 801  804                  ITIMES(ip);
 802  805          }
 803  806  #ifdef  __lock_lint
 804  807          rw_exit(&ip->i_contents);
 805  808  #else
 806  809          if (ip != NULL) {
 807  810                  rw_exit(&ip->i_contents);
 808  811          }
 809  812  #endif
 810  813          if (error) {
 811  814                  goto out;
 812  815          }
 813  816  
 814  817          /*
 815  818           * If vnode is a device return special vnode instead.
 816  819           */
 817  820          if (!error && IS_DEVVP(*vpp)) {
 818  821                  struct vnode *newvp;
 819  822  
 820  823                  newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 821  824                  VN_RELE(*vpp);
 822  825                  if (newvp == NULL) {
 823  826                          error = ENOSYS;
 824  827                          goto out;
 825  828                  }
 826  829                  *vpp = newvp;
 827  830          }
 828  831  out:
 829  832          return (error);
 830  833  }
 831  834  
 832  835  /* ARGSUSED */
 833  836  static int32_t
 834  837  udf_remove(
 835  838          struct vnode *vp,
 836  839          char *nm,
 837  840          struct cred *cr,
 838  841          caller_context_t *ct,
 839  842          int flags)
 840  843  {
 841  844          int32_t error;
 842  845          struct ud_inode *ip = VTOI(vp);
 843  846  
 844  847          ud_printf("udf_remove\n");
 845  848  
 846  849          rw_enter(&ip->i_rwlock, RW_WRITER);
 847  850          error = ud_dirremove(ip, nm,
 848  851              (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 849  852          rw_exit(&ip->i_rwlock);
 850  853          ITIMES(ip);
 851  854  
 852  855          return (error);
 853  856  }
 854  857  
 855  858  /* ARGSUSED */
 856  859  static int32_t
 857  860  udf_link(
 858  861          struct vnode *tdvp,
 859  862          struct vnode *svp,
 860  863          char *tnm,
 861  864          struct cred *cr,
 862  865          caller_context_t *ct,
 863  866          int flags)
 864  867  {
 865  868          int32_t error;
 866  869          struct vnode *realvp;
 867  870          struct ud_inode *sip;
 868  871          struct ud_inode *tdp;
 869  872  
 870  873          ud_printf("udf_link\n");
 871  874          if (VOP_REALVP(svp, &realvp, ct) == 0) {
 872  875                  svp = realvp;
 873  876          }
 874  877  
 875  878          /*
 876  879           * Do not allow links to directories
 877  880           */
 878  881          if (svp->v_type == VDIR) {
 879  882                  return (EPERM);
 880  883          }
 881  884  
 882  885          sip = VTOI(svp);
 883  886  
 884  887          if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 885  888                  return (EPERM);
 886  889  
 887  890          tdp = VTOI(tdvp);
 888  891  
 889  892          rw_enter(&tdp->i_rwlock, RW_WRITER);
 890  893          error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 891  894              sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 892  895          rw_exit(&tdp->i_rwlock);
 893  896          ITIMES(sip);
 894  897          ITIMES(tdp);
 895  898  
 896  899          if (error == 0) {
 897  900                  vnevent_link(svp, ct);
 898  901          }
 899  902  
 900  903          return (error);
 901  904  }
 902  905  
 903  906  /* ARGSUSED */
 904  907  static int32_t
 905  908  udf_rename(
 906  909          struct vnode *sdvp,
 907  910          char *snm,
 908  911          struct vnode *tdvp,
 909  912          char *tnm,
 910  913          struct cred *cr,
 911  914          caller_context_t *ct,
 912  915          int flags)
 913  916  {
 914  917          int32_t error = 0;
 915  918          struct udf_vfs *udf_vfsp;
 916  919          struct ud_inode *sip;           /* source inode */
 917  920          struct ud_inode *tip;           /* target inode */
 918  921          struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 919  922          struct vnode *realvp;
 920  923  
 921  924          ud_printf("udf_rename\n");
 922  925  
 923  926          if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 924  927                  tdvp = realvp;
 925  928          }
 926  929  
 927  930          sdp = VTOI(sdvp);
 928  931          tdp = VTOI(tdvp);
 929  932  
 930  933          udf_vfsp = sdp->i_udf;
 931  934  
 932  935          mutex_enter(&udf_vfsp->udf_rename_lck);
 933  936          /*
 934  937           * Look up inode of file we're supposed to rename.
 935  938           */
 936  939          if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 937  940                  mutex_exit(&udf_vfsp->udf_rename_lck);
 938  941                  return (error);
 939  942          }
 940  943          /*
 941  944           * be sure this is not a directory with another file system mounted
 942  945           * over it.  If it is just give up the locks, and return with
 943  946           * EBUSY
 944  947           */
 945  948          if (vn_mountedvfs(ITOV(sip)) != NULL) {
 946  949                  error = EBUSY;
 947  950                  goto errout;
 948  951          }
 949  952          /*
 950  953           * Make sure we can delete the source entry.  This requires
 951  954           * write permission on the containing directory.  If that
 952  955           * directory is "sticky" it further requires (except for
 953  956           * privileged users) that the user own the directory or the
 954  957           * source entry, or else have permission to write the source
 955  958           * entry.
 956  959           */
 957  960          rw_enter(&sdp->i_contents, RW_READER);
 958  961          rw_enter(&sip->i_contents, RW_READER);
 959  962          if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 960  963              (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 961  964                  rw_exit(&sip->i_contents);
 962  965                  rw_exit(&sdp->i_contents);
 963  966                  ITIMES(sip);
 964  967                  goto errout;
 965  968          }
 966  969  
 967  970          /*
 968  971           * Check for renaming '.' or '..' or alias of '.'
 969  972           */
 970  973          if ((strcmp(snm, ".") == 0) ||
 971  974              (strcmp(snm, "..") == 0) ||
 972  975              (sdp == sip)) {
 973  976                  error = EINVAL;
 974  977                  rw_exit(&sip->i_contents);
 975  978                  rw_exit(&sdp->i_contents);
 976  979                  goto errout;
 977  980          }
 978  981  
 979  982          rw_exit(&sip->i_contents);
 980  983          rw_exit(&sdp->i_contents);
 981  984  
 982  985          if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 983  986                  vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 984  987                  VN_RELE(ITOV(tip));
 985  988          }
 986  989  
 987  990          /* Notify the target dir. if not the same as the source dir. */
 988  991          if (sdvp != tdvp)
 989  992                  vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 990  993  
 991  994          vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 992  995  
 993  996          /*
 994  997           * Link source to the target.
 995  998           */
 996  999          rw_enter(&tdp->i_rwlock, RW_WRITER);
 997 1000          if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
 998 1001              (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
 999 1002                  /*
1000 1003                   * ESAME isn't really an error; it indicates that the
1001 1004                   * operation should not be done because the source and target
1002 1005                   * are the same file, but that no error should be reported.
1003 1006                   */
1004 1007                  if (error == ESAME) {
1005 1008                          error = 0;
1006 1009                  }
1007 1010                  rw_exit(&tdp->i_rwlock);
1008 1011                  goto errout;
1009 1012          }
1010 1013          rw_exit(&tdp->i_rwlock);
1011 1014  
1012 1015          rw_enter(&sdp->i_rwlock, RW_WRITER);
1013 1016          /*
1014 1017           * Unlink the source.
1015 1018           * Remove the source entry.  ud_dirremove() checks that the entry
1016 1019           * still reflects sip, and returns an error if it doesn't.
1017 1020           * If the entry has changed just forget about it.  Release
1018 1021           * the source inode.
1019 1022           */
1020 1023          if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1021 1024              DR_RENAME, cr, ct)) == ENOENT) {
1022 1025                  error = 0;
1023 1026          }
1024 1027          rw_exit(&sdp->i_rwlock);
1025 1028  
1026 1029          if (error == 0) {
1027 1030                  vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1028 1031                  /*
1029 1032                   * vnevent_rename_dest and vnevent_rename_dest_dir are called
1030 1033                   * in ud_direnter().
1031 1034                   */
1032 1035          }
1033 1036  
1034 1037  errout:
1035 1038          ITIMES(sdp);
1036 1039          ITIMES(tdp);
1037 1040          VN_RELE(ITOV(sip));
1038 1041          mutex_exit(&udf_vfsp->udf_rename_lck);
1039 1042  
1040 1043          return (error);
1041 1044  }
1042 1045  
1043 1046  /* ARGSUSED */
1044 1047  static int32_t
1045 1048  udf_mkdir(
1046 1049          struct vnode *dvp,
1047 1050          char *dirname,
1048 1051          struct vattr *vap,
1049 1052          struct vnode **vpp,
1050 1053          struct cred *cr,
1051 1054          caller_context_t *ct,
1052 1055          int flags,
1053 1056          vsecattr_t *vsecp)
1054 1057  {
1055 1058          int32_t error;
1056 1059          struct ud_inode *ip;
1057 1060          struct ud_inode *xip;
1058 1061  
1059 1062          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1060 1063  
1061 1064          ud_printf("udf_mkdir\n");
1062 1065  
1063 1066          ip = VTOI(dvp);
1064 1067          rw_enter(&ip->i_rwlock, RW_WRITER);
1065 1068          error = ud_direnter(ip, dirname, DE_MKDIR,
1066 1069              (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1067 1070          rw_exit(&ip->i_rwlock);
1068 1071          ITIMES(ip);
1069 1072          if (error == 0) {
1070 1073                  ip = xip;
1071 1074                  *vpp = ITOV(ip);
1072 1075                  ITIMES(ip);
1073 1076          } else if (error == EEXIST) {
1074 1077                  ITIMES(xip);
1075 1078                  VN_RELE(ITOV(xip));
1076 1079          }
1077 1080  
1078 1081          return (error);
1079 1082  }
1080 1083  
1081 1084  /* ARGSUSED */
1082 1085  static int32_t
1083 1086  udf_rmdir(
1084 1087          struct vnode *vp,
1085 1088          char *nm,
1086 1089          struct vnode *cdir,
1087 1090          struct cred *cr,
1088 1091          caller_context_t *ct,
1089 1092          int flags)
1090 1093  {
1091 1094          int32_t error;
1092 1095          struct ud_inode *ip = VTOI(vp);
1093 1096  
1094 1097          ud_printf("udf_rmdir\n");
1095 1098  
1096 1099          rw_enter(&ip->i_rwlock, RW_WRITER);
1097 1100          error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1098 1101              cr, ct);
1099 1102          rw_exit(&ip->i_rwlock);
1100 1103          ITIMES(ip);
1101 1104  
1102 1105          return (error);
1103 1106  }
1104 1107  
1105 1108  /* ARGSUSED */
1106 1109  static int32_t
1107 1110  udf_readdir(
1108 1111          struct vnode *vp,
1109 1112          struct uio *uiop,
1110 1113          struct cred *cr,
1111 1114          int32_t *eofp,
1112 1115          caller_context_t *ct,
1113 1116          int flags)
1114 1117  {
1115 1118          struct ud_inode *ip;
1116 1119          struct dirent64 *nd;
1117 1120          struct udf_vfs *udf_vfsp;
1118 1121          int32_t error = 0, len, outcount = 0;
1119 1122          uint32_t dirsiz, offset;
1120 1123          uint32_t bufsize, ndlen, dummy;
1121 1124          caddr_t outbuf;
1122 1125          caddr_t outb, end_outb;
1123 1126          struct iovec *iovp;
1124 1127  
1125 1128          uint8_t *dname;
1126 1129          int32_t length;
1127 1130  
1128 1131          uint8_t *buf = NULL;
1129 1132  
1130 1133          struct fbuf *fbp = NULL;
1131 1134          struct file_id *fid;
1132 1135          uint8_t *name;
1133 1136  
1134 1137  
1135 1138          ud_printf("udf_readdir\n");
1136 1139  
1137 1140          ip = VTOI(vp);
1138 1141          udf_vfsp = ip->i_udf;
1139 1142  
1140 1143          dirsiz = ip->i_size;
1141 1144          if ((uiop->uio_offset >= dirsiz) ||
1142 1145              (ip->i_nlink <= 0)) {
1143 1146                  if (eofp) {
1144 1147                          *eofp = 1;
1145 1148                  }
1146 1149                  return (0);
1147 1150          }
1148 1151  
1149 1152          offset = uiop->uio_offset;
1150 1153          iovp = uiop->uio_iov;
1151 1154          bufsize = iovp->iov_len;
1152 1155  
1153 1156          outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1154 1157          end_outb = outb + bufsize;
1155 1158          nd = (struct dirent64 *)outbuf;
1156 1159  
1157 1160          dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1158 1161          buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1159 1162  
1160 1163          if (offset == 0) {
1161 1164                  len = DIRENT64_RECLEN(1);
1162 1165                  if (((caddr_t)nd + len) >= end_outb) {
1163 1166                          error = EINVAL;
1164 1167                          goto end;
1165 1168                  }
1166 1169                  nd->d_ino = ip->i_icb_lbano;
1167 1170                  nd->d_reclen = (uint16_t)len;
1168 1171                  nd->d_off = 0x10;
1169 1172                  nd->d_name[0] = '.';
1170 1173                  bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1171 1174                  nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1172 1175                  outcount++;
1173 1176          } else if (offset == 0x10) {
1174 1177                  offset = 0;
1175 1178          }
1176 1179  
1177 1180          while (offset < dirsiz) {
1178 1181                  error = ud_get_next_fid(ip, &fbp,
1179 1182                      offset, &fid, &name, buf);
1180 1183                  if (error != 0) {
1181 1184                          break;
1182 1185                  }
1183 1186  
1184 1187                  if ((fid->fid_flags & FID_DELETED) == 0) {
1185 1188                          if (fid->fid_flags & FID_PARENT) {
1186 1189  
1187 1190                                  len = DIRENT64_RECLEN(2);
1188 1191                                  if (((caddr_t)nd + len) >= end_outb) {
1189 1192                                          error = EINVAL;
1190 1193                                          break;
1191 1194                                  }
1192 1195  
1193 1196                                  nd->d_ino = ip->i_icb_lbano;
1194 1197                                  nd->d_reclen = (uint16_t)len;
1195 1198                                  nd->d_off = offset + FID_LEN(fid);
1196 1199                                  nd->d_name[0] = '.';
1197 1200                                  nd->d_name[1] = '.';
1198 1201                                  bzero(&nd->d_name[2],
1199 1202                                      DIRENT64_NAMELEN(len) - 2);
1200 1203                                  nd = (struct dirent64 *)
1201 1204                                      ((char *)nd + nd->d_reclen);
1202 1205                          } else {
1203 1206                                  if ((error = ud_uncompress(fid->fid_idlen,
1204 1207                                      &length, name, dname)) != 0) {
1205 1208                                          break;
1206 1209                                  }
1207 1210                                  if (length == 0) {
1208 1211                                          offset += FID_LEN(fid);
1209 1212                                          continue;
1210 1213                                  }
1211 1214                                  len = DIRENT64_RECLEN(length);
1212 1215                                  if (((caddr_t)nd + len) >= end_outb) {
1213 1216                                          if (!outcount) {
1214 1217                                                  error = EINVAL;
1215 1218                                          }
1216 1219                                          break;
1217 1220                                  }
1218 1221                                  (void) strncpy(nd->d_name,
1219 1222                                      (caddr_t)dname, length);
1220 1223                                  bzero(&nd->d_name[length],
1221 1224                                      DIRENT64_NAMELEN(len) - length);
1222 1225                                  nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1223 1226                                      SWAP_16(fid->fid_icb.lad_ext_prn),
1224 1227                                      SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1225 1228                                      &dummy);
1226 1229                                  nd->d_reclen = (uint16_t)len;
1227 1230                                  nd->d_off = offset + FID_LEN(fid);
1228 1231                                  nd = (struct dirent64 *)
1229 1232                                      ((char *)nd + nd->d_reclen);
1230 1233                          }
1231 1234                          outcount++;
1232 1235                  }
1233 1236  
1234 1237                  offset += FID_LEN(fid);
1235 1238          }
1236 1239  
1237 1240  end:
1238 1241          if (fbp != NULL) {
1239 1242                  fbrelse(fbp, S_OTHER);
1240 1243          }
1241 1244          ndlen = ((char *)nd - outbuf);
1242 1245          /*
1243 1246           * In case of error do not call uiomove.
1244 1247           * Return the error to the caller.
1245 1248           */
1246 1249          if ((error == 0) && (ndlen != 0)) {
1247 1250                  error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1248 1251                  uiop->uio_offset = offset;
1249 1252          }
1250 1253          kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1251 1254          kmem_free((caddr_t)dname, 1024);
1252 1255          kmem_free(outbuf, (uint32_t)bufsize);
1253 1256          if (eofp && error == 0) {
1254 1257                  *eofp = (uiop->uio_offset >= dirsiz);
1255 1258          }
1256 1259          return (error);
1257 1260  }
1258 1261  
1259 1262  /* ARGSUSED */
1260 1263  static int32_t
1261 1264  udf_symlink(
1262 1265          struct vnode *dvp,
1263 1266          char *linkname,
1264 1267          struct vattr *vap,
1265 1268          char *target,
1266 1269          struct cred *cr,
1267 1270          caller_context_t *ct,
1268 1271          int flags)
1269 1272  {
1270 1273          int32_t error = 0, outlen;
1271 1274          uint32_t ioflag = 0;
1272 1275          struct ud_inode *ip, *dip = VTOI(dvp);
1273 1276  
1274 1277          struct path_comp *pc;
1275 1278          int8_t *dname = NULL, *uname = NULL, *sp;
1276 1279  
1277 1280          ud_printf("udf_symlink\n");
1278 1281  
1279 1282          ip = (struct ud_inode *)0;
1280 1283          vap->va_type = VLNK;
1281 1284          vap->va_rdev = 0;
1282 1285  
1283 1286          rw_enter(&dip->i_rwlock, RW_WRITER);
1284 1287          error = ud_direnter(dip, linkname, DE_CREATE,
1285 1288              (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1286 1289          rw_exit(&dip->i_rwlock);
1287 1290          if (error == 0) {
1288 1291                  dname = kmem_zalloc(1024, KM_SLEEP);
1289 1292                  uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1290 1293  
1291 1294                  pc = (struct path_comp *)uname;
1292 1295                  /*
1293 1296                   * If the first character in target is "/"
1294 1297                   * then skip it and create entry for it
1295 1298                   */
1296 1299                  if (*target == '/') {
1297 1300                          pc->pc_type = 2;
1298 1301                          pc->pc_len = 0;
1299 1302                          pc = (struct path_comp *)(((char *)pc) + 4);
1300 1303                          while (*target == '/') {
1301 1304                                  target++;
1302 1305                          }
1303 1306                  }
1304 1307  
1305 1308                  while (*target != NULL) {
1306 1309                          sp = target;
1307 1310                          while ((*target != '/') && (*target != '\0')) {
1308 1311                                  target ++;
1309 1312                          }
1310 1313                          /*
1311 1314                           * We got the next component of the
1312 1315                           * path name. Create path_comp of
1313 1316                           * appropriate type
1314 1317                           */
1315 1318                          if (((target - sp) == 1) && (*sp == '.')) {
1316 1319                                  /*
1317 1320                                   * Dot entry.
1318 1321                                   */
1319 1322                                  pc->pc_type = 4;
1320 1323                                  pc = (struct path_comp *)(((char *)pc) + 4);
1321 1324                          } else if (((target - sp) == 2) &&
1322 1325                              (*sp == '.') && ((*(sp + 1)) == '.')) {
1323 1326                                  /*
1324 1327                                   * DotDot entry.
1325 1328                                   */
1326 1329                                  pc->pc_type = 3;
1327 1330                                  pc = (struct path_comp *)(((char *)pc) + 4);
1328 1331                          } else {
1329 1332                                  /*
1330 1333                                   * convert the user given name
1331 1334                                   * into appropriate form to be put
1332 1335                                   * on the media
1333 1336                                   */
1334 1337                                  outlen = 1024;  /* set to size of dname */
1335 1338                                  if (error = ud_compress(target - sp, &outlen,
1336 1339                                      (uint8_t *)sp, (uint8_t *)dname)) {
1337 1340                                          break;
1338 1341                                  }
1339 1342                                  pc->pc_type = 5;
1340 1343                                  /* LINTED */
1341 1344                                  pc->pc_len = outlen;
1342 1345                                  dname[outlen] = '\0';
1343 1346                                  (void) strcpy((char *)pc->pc_id, dname);
1344 1347                                  pc = (struct path_comp *)
1345 1348                                      (((char *)pc) + 4 + outlen);
1346 1349                          }
1347 1350                          while (*target == '/') {
1348 1351                                  target++;
1349 1352                          }
1350 1353                          if (*target == NULL) {
1351 1354                                  break;
1352 1355                          }
1353 1356                  }
1354 1357  
1355 1358                  rw_enter(&ip->i_contents, RW_WRITER);
1356 1359                  if (error == 0) {
1357 1360                          ioflag = FWRITE;
1358 1361                          if (curthread->t_flag & T_DONTPEND) {
1359 1362                                  ioflag |= FDSYNC;
1360 1363                          }
1361 1364                          error = ud_rdwri(UIO_WRITE, ioflag, ip,
1362 1365                              uname, ((int8_t *)pc) - uname,
1363 1366                              (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1364 1367                  }
1365 1368                  if (error) {
1366 1369                          ud_idrop(ip);
1367 1370                          rw_exit(&ip->i_contents);
1368 1371                          rw_enter(&dip->i_rwlock, RW_WRITER);
1369 1372                          (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1370 1373                              (struct vnode *)0, DR_REMOVE, cr, ct);
1371 1374                          rw_exit(&dip->i_rwlock);
1372 1375                          goto update_inode;
1373 1376                  }
1374 1377                  rw_exit(&ip->i_contents);
1375 1378          }
1376 1379  
1377 1380          if ((error == 0) || (error == EEXIST)) {
1378 1381                  VN_RELE(ITOV(ip));
1379 1382          }
1380 1383  
1381 1384  update_inode:
1382 1385          ITIMES(VTOI(dvp));
1383 1386          if (uname != NULL) {
1384 1387                  kmem_free(uname, PAGESIZE);
1385 1388          }
1386 1389          if (dname != NULL) {
1387 1390                  kmem_free(dname, 1024);
1388 1391          }
1389 1392  
1390 1393          return (error);
1391 1394  }
1392 1395  
1393 1396  /* ARGSUSED */
1394 1397  static int32_t
1395 1398  udf_readlink(
1396 1399          struct vnode *vp,
1397 1400          struct uio *uiop,
1398 1401          struct cred *cr,
1399 1402          caller_context_t *ct)
1400 1403  {
1401 1404          int32_t error = 0, off, id_len, size, len;
1402 1405          int8_t *dname = NULL, *uname = NULL;
1403 1406          struct ud_inode *ip;
1404 1407          struct fbuf *fbp = NULL;
1405 1408          struct path_comp *pc;
1406 1409  
1407 1410          ud_printf("udf_readlink\n");
1408 1411  
1409 1412          if (vp->v_type != VLNK) {
1410 1413                  return (EINVAL);
1411 1414          }
1412 1415  
1413 1416          ip = VTOI(vp);
1414 1417          size = ip->i_size;
1415 1418          if (size > PAGESIZE) {
1416 1419                  return (EIO);
1417 1420          }
1418 1421  
1419 1422          if (size == 0) {
1420 1423                  return (0);
1421 1424          }
1422 1425  
1423 1426          dname = kmem_zalloc(1024, KM_SLEEP);
1424 1427          uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1425 1428  
1426 1429          rw_enter(&ip->i_contents, RW_READER);
1427 1430  
1428 1431          if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1429 1432                  goto end;
1430 1433          }
1431 1434  
1432 1435          off = 0;
1433 1436  
1434 1437          while (off < size) {
1435 1438                  pc = (struct path_comp *)(fbp->fb_addr + off);
1436 1439                  switch (pc->pc_type) {
1437 1440                          case 1 :
1438 1441                                  (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1439 1442                                  (void) strcat(uname, "/");
1440 1443                                  break;
1441 1444                          case 2 :
1442 1445                                  if (pc->pc_len != 0) {
1443 1446                                          goto end;
1444 1447                                  }
1445 1448                                  uname[0] = '/';
1446 1449                                  uname[1] = '\0';
1447 1450                                  break;
1448 1451                          case 3 :
1449 1452                                  (void) strcat(uname, "../");
1450 1453                                  break;
1451 1454                          case 4 :
1452 1455                                  (void) strcat(uname, "./");
1453 1456                                  break;
1454 1457                          case 5 :
1455 1458                                  if ((error = ud_uncompress(pc->pc_len, &id_len,
1456 1459                                      pc->pc_id, (uint8_t *)dname)) != 0) {
1457 1460                                          break;
1458 1461                                  }
1459 1462                                  dname[id_len] = '\0';
1460 1463                                  (void) strcat(uname, dname);
1461 1464                                  (void) strcat(uname, "/");
1462 1465                                  break;
1463 1466                          default :
1464 1467                                  error = EINVAL;
1465 1468                                  goto end;
1466 1469                  }
1467 1470                  off += 4 + pc->pc_len;
1468 1471          }
1469 1472          len = strlen(uname) - 1;
1470 1473          if (uname[len] == '/') {
1471 1474                  if (len == 0) {
1472 1475                          /*
1473 1476                           * special case link to /
1474 1477                           */
1475 1478                          len = 1;
1476 1479                  } else {
1477 1480                          uname[len] = '\0';
1478 1481                  }
1479 1482          }
1480 1483  
1481 1484          error = uiomove(uname, len, UIO_READ, uiop);
1482 1485  
1483 1486          ITIMES(ip);
1484 1487  
1485 1488  end:
1486 1489          if (fbp != NULL) {
1487 1490                  fbrelse(fbp, S_OTHER);
1488 1491          }
1489 1492          rw_exit(&ip->i_contents);
1490 1493          if (uname != NULL) {
1491 1494                  kmem_free(uname, PAGESIZE);
1492 1495          }
1493 1496          if (dname != NULL) {
1494 1497                  kmem_free(dname, 1024);
1495 1498          }
1496 1499          return (error);
1497 1500  }
1498 1501  
1499 1502  /* ARGSUSED */
1500 1503  static int32_t
1501 1504  udf_fsync(
1502 1505          struct vnode *vp,
1503 1506          int32_t syncflag,
1504 1507          struct cred *cr,
1505 1508          caller_context_t *ct)
1506 1509  {
1507 1510          int32_t error = 0;
1508 1511          struct ud_inode *ip = VTOI(vp);
1509 1512  
1510 1513          ud_printf("udf_fsync\n");
1511 1514  
1512 1515          rw_enter(&ip->i_contents, RW_WRITER);
1513 1516          if (!(IS_SWAPVP(vp))) {
1514 1517                  error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1515 1518          }
1516 1519          if (error == 0) {
1517 1520                  error = ud_sync_indir(ip);
1518 1521          }
1519 1522          ITIMES(ip);             /* XXX: is this necessary ??? */
1520 1523          rw_exit(&ip->i_contents);
1521 1524  
1522 1525          return (error);
1523 1526  }
1524 1527  
1525 1528  /* ARGSUSED */
1526 1529  static void
1527 1530  udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1528 1531  {
1529 1532          ud_printf("udf_iinactive\n");
1530 1533  
1531 1534          ud_iinactive(VTOI(vp), cr);
1532 1535  }
1533 1536  
1534 1537  /* ARGSUSED */
1535 1538  static int32_t
1536 1539  udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1537 1540  {
1538 1541          struct udf_fid *udfidp;
1539 1542          struct ud_inode *ip = VTOI(vp);
1540 1543  
1541 1544          ud_printf("udf_fid\n");
1542 1545  
1543 1546          if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1544 1547                  fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1545 1548                  return (ENOSPC);
1546 1549          }
1547 1550  
1548 1551          udfidp = (struct udf_fid *)fidp;
1549 1552          bzero((char *)udfidp, sizeof (struct udf_fid));
1550 1553          rw_enter(&ip->i_contents, RW_READER);
1551 1554          udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1552 1555          udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1553 1556          udfidp->udfid_prn = ip->i_icb_prn;
1554 1557          udfidp->udfid_icb_lbn = ip->i_icb_block;
1555 1558          rw_exit(&ip->i_contents);
1556 1559  
1557 1560          return (0);
1558 1561  }
1559 1562  
1560 1563  /* ARGSUSED2 */
1561 1564  static int
1562 1565  udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1563 1566  {
1564 1567          struct ud_inode *ip = VTOI(vp);
1565 1568  
1566 1569          ud_printf("udf_rwlock\n");
1567 1570  
1568 1571          if (write_lock) {
1569 1572                  rw_enter(&ip->i_rwlock, RW_WRITER);
1570 1573          } else {
1571 1574                  rw_enter(&ip->i_rwlock, RW_READER);
1572 1575          }
1573 1576  #ifdef  __lock_lint
1574 1577          rw_exit(&ip->i_rwlock);
1575 1578  #endif
1576 1579          return (write_lock);
1577 1580  }
1578 1581  
1579 1582  /* ARGSUSED */
1580 1583  static void
1581 1584  udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1582 1585  {
1583 1586          struct ud_inode *ip = VTOI(vp);
1584 1587  
1585 1588          ud_printf("udf_rwunlock\n");
1586 1589  
1587 1590  #ifdef  __lock_lint
1588 1591          rw_enter(&ip->i_rwlock, RW_WRITER);
1589 1592  #endif
1590 1593  
1591 1594          rw_exit(&ip->i_rwlock);
1592 1595  
1593 1596  }
1594 1597  
1595 1598  /* ARGSUSED */
1596 1599  static int32_t
1597 1600  udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1598 1601  {
1599 1602          return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1600 1603  }
1601 1604  
1602 1605  static int32_t
1603 1606  udf_frlock(
1604 1607          struct vnode *vp,
1605 1608          int32_t cmd,
1606 1609          struct flock64 *bfp,
1607 1610          int32_t flag,
1608 1611          offset_t offset,
1609 1612          struct flk_callback *flk_cbp,
1610 1613          cred_t *cr,
1611 1614          caller_context_t *ct)
1612 1615  {
1613 1616          struct ud_inode *ip = VTOI(vp);
1614 1617  
1615 1618          ud_printf("udf_frlock\n");
1616 1619  
1617 1620          /*
1618 1621           * If file is being mapped, disallow frlock.
1619 1622           * XXX I am not holding tlock while checking i_mapcnt because the
1620 1623           * current locking strategy drops all locks before calling fs_frlock.
1621 1624           * So, mapcnt could change before we enter fs_frlock making is
1622 1625           * meaningless to have held tlock in the first place.
1623 1626           */
1624 1627          if ((ip->i_mapcnt > 0) &&
1625 1628              (MANDLOCK(vp, ip->i_char))) {
1626 1629                  return (EAGAIN);
1627 1630          }
1628 1631  
1629 1632          return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1630 1633  }
1631 1634  
1632 1635  /*ARGSUSED6*/
1633 1636  static int32_t
1634 1637  udf_space(
1635 1638          struct vnode *vp,
1636 1639          int32_t cmd,
1637 1640          struct flock64 *bfp,
1638 1641          int32_t flag,
1639 1642          offset_t offset,
1640 1643          cred_t *cr,
1641 1644          caller_context_t *ct)

↓ open down ↓

1058 lines elided

↑ open up ↑

1642 1645  {
1643 1646          int32_t error = 0;
1644 1647  
1645 1648          ud_printf("udf_space\n");
1646 1649  
1647 1650          if (cmd != F_FREESP) {
1648 1651                  error =  EINVAL;
1649 1652          } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1650 1653                  error = ud_freesp(vp, bfp, flag, cr);
1651 1654  
1652      -                if (error == 0 && bfp->l_start == 0)
1653      -                        vnevent_truncate(vp, ct);
     1655 +                if (error == 0) {
     1656 +                        if (bfp->l_start == 0) {
     1657 +                                vnevent_truncate(vp, ct);
     1658 +                        } else {
     1659 +                                vnevent_resize(vp, ct);
     1660 +                        }
     1661 +                }
1654 1662          }
1655 1663  
1656 1664          return (error);
1657 1665  }
1658 1666  
1659 1667  /* ARGSUSED */
1660 1668  static int32_t
1661 1669  udf_getpage(
1662 1670          struct vnode *vp,
1663 1671          offset_t off,

1664 1672          size_t len,
1665 1673          uint32_t *protp,
1666 1674          struct page **plarr,
1667 1675          size_t plsz,
1668 1676          struct seg *seg,
1669 1677          caddr_t addr,
1670 1678          enum seg_rw rw,
1671 1679          struct cred *cr,
1672 1680          caller_context_t *ct)
1673 1681  {
1674 1682          struct ud_inode *ip = VTOI(vp);
1675 1683          int32_t error, has_holes, beyond_eof, seqmode, dolock;
1676 1684          int32_t pgsize = PAGESIZE;
1677 1685          struct udf_vfs *udf_vfsp = ip->i_udf;
1678 1686          page_t **pl;
1679 1687          u_offset_t pgoff, eoff, uoff;
1680 1688          krw_t rwtype;
1681 1689          caddr_t pgaddr;
1682 1690  
1683 1691          ud_printf("udf_getpage\n");
1684 1692  
1685 1693          uoff = (u_offset_t)off; /* type conversion */
1686 1694          if (protp) {
1687 1695                  *protp = PROT_ALL;
1688 1696          }
1689 1697          if (vp->v_flag & VNOMAP) {
1690 1698                  return (ENOSYS);
1691 1699          }
1692 1700          seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1693 1701  
1694 1702          rwtype = RW_READER;
1695 1703          dolock = (rw_owner(&ip->i_contents) != curthread);
1696 1704  retrylock:
1697 1705  #ifdef  __lock_lint
1698 1706          rw_enter(&ip->i_contents, rwtype);
1699 1707  #else
1700 1708          if (dolock) {
1701 1709                  rw_enter(&ip->i_contents, rwtype);
1702 1710          }
1703 1711  #endif
1704 1712  
1705 1713          /*
1706 1714           * We may be getting called as a side effect of a bmap using
1707 1715           * fbread() when the blocks might be being allocated and the
1708 1716           * size has not yet been up'ed.  In this case we want to be
1709 1717           * able to return zero pages if we get back UDF_HOLE from
1710 1718           * calling bmap for a non write case here.  We also might have
1711 1719           * to read some frags from the disk into a page if we are
1712 1720           * extending the number of frags for a given lbn in bmap().
1713 1721           */
1714 1722          beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1715 1723          if (beyond_eof && seg != segkmap) {
1716 1724  #ifdef  __lock_lint
1717 1725                  rw_exit(&ip->i_contents);
1718 1726  #else
1719 1727                  if (dolock) {
1720 1728                          rw_exit(&ip->i_contents);
1721 1729                  }
1722 1730  #endif
1723 1731                  return (EFAULT);
1724 1732          }
1725 1733  
1726 1734          /*
1727 1735           * Must hold i_contents lock throughout the call to pvn_getpages
1728 1736           * since locked pages are returned from each call to ud_getapage.
1729 1737           * Must *not* return locked pages and then try for contents lock
1730 1738           * due to lock ordering requirements (inode > page)
1731 1739           */
1732 1740  
1733 1741          has_holes = ud_bmap_has_holes(ip);
1734 1742  
1735 1743          if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1736 1744                  int32_t blk_size, count;
1737 1745                  u_offset_t offset;
1738 1746  
1739 1747                  /*
1740 1748                   * We must acquire the RW_WRITER lock in order to
1741 1749                   * call bmap_write().
1742 1750                   */
1743 1751                  if (dolock && rwtype == RW_READER) {
1744 1752                          rwtype = RW_WRITER;
1745 1753  
1746 1754                          if (!rw_tryupgrade(&ip->i_contents)) {
1747 1755  
1748 1756                                  rw_exit(&ip->i_contents);
1749 1757  
1750 1758                                  goto retrylock;
1751 1759                          }
1752 1760                  }
1753 1761  
1754 1762                  /*
1755 1763                   * May be allocating disk blocks for holes here as
1756 1764                   * a result of mmap faults. write(2) does the bmap_write
1757 1765                   * in rdip/wrip, not here. We are not dealing with frags
1758 1766                   * in this case.
1759 1767                   */
1760 1768                  offset = uoff;
1761 1769                  while ((offset < uoff + len) &&
1762 1770                      (offset < ip->i_size)) {
1763 1771                          /*
1764 1772                           * the variable "bnp" is to simplify the expression for
1765 1773                           * the compiler; * just passing in &bn to bmap_write
1766 1774                           * causes a compiler "loop"
1767 1775                           */
1768 1776  
1769 1777                          blk_size = udf_vfsp->udf_lbsize;
1770 1778                          if ((offset + blk_size) > ip->i_size) {
1771 1779                                  count = ip->i_size - offset;
1772 1780                          } else {
1773 1781                                  count = blk_size;
1774 1782                          }
1775 1783                          error = ud_bmap_write(ip, offset, count, 0, cr);
1776 1784                          if (error) {
1777 1785                                  goto update_inode;
1778 1786                          }
1779 1787                          offset += count; /* XXX - make this contig */
1780 1788                  }
1781 1789          }
1782 1790  
1783 1791          /*
1784 1792           * Can be a reader from now on.
1785 1793           */
1786 1794  #ifdef  __lock_lint
1787 1795          if (rwtype == RW_WRITER) {
1788 1796                  rw_downgrade(&ip->i_contents);
1789 1797          }
1790 1798  #else
1791 1799          if (dolock && rwtype == RW_WRITER) {
1792 1800                  rw_downgrade(&ip->i_contents);
1793 1801          }
1794 1802  #endif
1795 1803  
1796 1804          /*
1797 1805           * We remove PROT_WRITE in cases when the file has UDF holes
1798 1806           * because we don't  want to call bmap_read() to check each
1799 1807           * page if it is backed with a disk block.
1800 1808           */
1801 1809          if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1802 1810                  *protp &= ~PROT_WRITE;
1803 1811          }
1804 1812  
1805 1813          error = 0;
1806 1814  
1807 1815          /*
1808 1816           * The loop looks up pages in the range <off, off + len).
1809 1817           * For each page, we first check if we should initiate an asynchronous
1810 1818           * read ahead before we call page_lookup (we may sleep in page_lookup
1811 1819           * for a previously initiated disk read).
1812 1820           */
1813 1821          eoff = (uoff + len);
1814 1822          for (pgoff = uoff, pgaddr = addr, pl = plarr;
1815 1823              pgoff < eoff; /* empty */) {
1816 1824                  page_t  *pp;
1817 1825                  u_offset_t      nextrio;
1818 1826                  se_t    se;
1819 1827  
1820 1828                  se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1821 1829  
1822 1830                  /*
1823 1831                   * Handle async getpage (faultahead)
1824 1832                   */
1825 1833                  if (plarr == NULL) {
1826 1834                          ip->i_nextrio = pgoff;
1827 1835                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1828 1836                          pgoff += pgsize;
1829 1837                          pgaddr += pgsize;
1830 1838                          continue;
1831 1839                  }
1832 1840  
1833 1841                  /*
1834 1842                   * Check if we should initiate read ahead of next cluster.
1835 1843                   * We call page_exists only when we need to confirm that
1836 1844                   * we have the current page before we initiate the read ahead.
1837 1845                   */
1838 1846                  nextrio = ip->i_nextrio;
1839 1847                  if (seqmode &&
1840 1848                      pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1841 1849                      nextrio < ip->i_size && page_exists(vp, pgoff))
1842 1850                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1843 1851  
1844 1852                  if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1845 1853  
1846 1854                          /*
1847 1855                           * We found the page in the page cache.
1848 1856                           */
1849 1857                          *pl++ = pp;
1850 1858                          pgoff += pgsize;
1851 1859                          pgaddr += pgsize;
1852 1860                          len -= pgsize;
1853 1861                          plsz -= pgsize;
1854 1862                  } else  {
1855 1863  
1856 1864                          /*
1857 1865                           * We have to create the page, or read it from disk.
1858 1866                           */
1859 1867                          if (error = ud_getpage_miss(vp, pgoff, len,
1860 1868                              seg, pgaddr, pl, plsz, rw, seqmode)) {
1861 1869                                  goto error_out;
1862 1870                          }
1863 1871  
1864 1872                          while (*pl != NULL) {
1865 1873                                  pl++;
1866 1874                                  pgoff += pgsize;
1867 1875                                  pgaddr += pgsize;
1868 1876                                  len -= pgsize;
1869 1877                                  plsz -= pgsize;
1870 1878                          }
1871 1879                  }
1872 1880          }
1873 1881  
1874 1882          /*
1875 1883           * Return pages up to plsz if they are in the page cache.
1876 1884           * We cannot return pages if there is a chance that they are
1877 1885           * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1878 1886           */
1879 1887          if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1880 1888  
1881 1889                  ASSERT((protp == NULL) ||
1882 1890                      !(has_holes && (*protp & PROT_WRITE)));
1883 1891  
1884 1892                  eoff = pgoff + plsz;
1885 1893                  while (pgoff < eoff) {
1886 1894                          page_t          *pp;
1887 1895  
1888 1896                          if ((pp = page_lookup_nowait(vp, pgoff,
1889 1897                              SE_SHARED)) == NULL)
1890 1898                                  break;
1891 1899  
1892 1900                          *pl++ = pp;
1893 1901                          pgoff += pgsize;
1894 1902                          plsz -= pgsize;
1895 1903                  }
1896 1904          }
1897 1905  
1898 1906          if (plarr)
1899 1907                  *pl = NULL;                     /* Terminate page list */
1900 1908          ip->i_nextr = pgoff;
1901 1909  
1902 1910  error_out:
1903 1911          if (error && plarr) {
1904 1912                  /*
1905 1913                   * Release any pages we have locked.
1906 1914                   */
1907 1915                  while (pl > &plarr[0])
1908 1916                          page_unlock(*--pl);
1909 1917  
1910 1918                  plarr[0] = NULL;
1911 1919          }
1912 1920  
1913 1921  update_inode:
1914 1922  #ifdef  __lock_lint
1915 1923          rw_exit(&ip->i_contents);
1916 1924  #else
1917 1925          if (dolock) {
1918 1926                  rw_exit(&ip->i_contents);
1919 1927          }
1920 1928  #endif
1921 1929  
1922 1930          /*
1923 1931           * If the inode is not already marked for IACC (in rwip() for read)
1924 1932           * and the inode is not marked for no access time update (in rwip()
1925 1933           * for write) then update the inode access time and mod time now.
1926 1934           */
1927 1935          mutex_enter(&ip->i_tlock);
1928 1936          if ((ip->i_flag & (IACC | INOACC)) == 0) {
1929 1937                  if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1930 1938                          ip->i_flag |= IACC;
1931 1939                  }
1932 1940                  if (rw == S_WRITE) {
1933 1941                          ip->i_flag |= IUPD;
1934 1942                  }
1935 1943                  ITIMES_NOLOCK(ip);
1936 1944          }
1937 1945          mutex_exit(&ip->i_tlock);
1938 1946  
1939 1947          return (error);
1940 1948  }
1941 1949  
1942 1950  int32_t ud_delay = 1;
1943 1951  
1944 1952  /* ARGSUSED */
1945 1953  static int32_t
1946 1954  udf_putpage(
1947 1955          struct vnode *vp,
1948 1956          offset_t off,
1949 1957          size_t len,
1950 1958          int32_t flags,
1951 1959          struct cred *cr,
1952 1960          caller_context_t *ct)
1953 1961  {
1954 1962          struct ud_inode *ip;
1955 1963          int32_t error = 0;
1956 1964  
1957 1965          ud_printf("udf_putpage\n");
1958 1966  
1959 1967          ip = VTOI(vp);
1960 1968  #ifdef  __lock_lint
1961 1969          rw_enter(&ip->i_contents, RW_WRITER);
1962 1970  #endif
1963 1971  
1964 1972          if (vp->v_count == 0) {
1965 1973                  cmn_err(CE_WARN, "ud_putpage : bad v_count");
1966 1974                  error = EINVAL;
1967 1975                  goto out;
1968 1976          }
1969 1977  
1970 1978          if (vp->v_flag & VNOMAP) {
1971 1979                  error = ENOSYS;
1972 1980                  goto out;
1973 1981          }
1974 1982  
1975 1983          if (flags & B_ASYNC) {
1976 1984                  if (ud_delay && len &&
1977 1985                      (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1978 1986                          mutex_enter(&ip->i_tlock);
1979 1987  
1980 1988                          /*
1981 1989                           * If nobody stalled, start a new cluster.
1982 1990                           */
1983 1991                          if (ip->i_delaylen == 0) {
1984 1992                                  ip->i_delayoff = off;
1985 1993                                  ip->i_delaylen = len;
1986 1994                                  mutex_exit(&ip->i_tlock);
1987 1995                                  goto out;
1988 1996                          }
1989 1997  
1990 1998                          /*
1991 1999                           * If we have a full cluster or they are not contig,
1992 2000                           * then push last cluster and start over.
1993 2001                           */
1994 2002                          if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1995 2003                              ip->i_delayoff + ip->i_delaylen != off) {
1996 2004                                  u_offset_t doff;
1997 2005                                  size_t dlen;
1998 2006  
1999 2007                                  doff = ip->i_delayoff;
2000 2008                                  dlen = ip->i_delaylen;
2001 2009                                  ip->i_delayoff = off;
2002 2010                                  ip->i_delaylen = len;
2003 2011                                  mutex_exit(&ip->i_tlock);
2004 2012                                  error = ud_putpages(vp, doff, dlen, flags, cr);
2005 2013                                  /* LMXXX - flags are new val, not old */
2006 2014                                  goto out;
2007 2015                          }
2008 2016  
2009 2017                          /*
2010 2018                           * There is something there, it's not full, and
2011 2019                           * it is contig.
2012 2020                           */
2013 2021                          ip->i_delaylen += len;
2014 2022                          mutex_exit(&ip->i_tlock);
2015 2023                          goto out;
2016 2024                  }
2017 2025  
2018 2026                  /*
2019 2027                   * Must have weird flags or we are not clustering.
2020 2028                   */
2021 2029          }
2022 2030  
2023 2031          error = ud_putpages(vp, off, len, flags, cr);
2024 2032  
2025 2033  out:
2026 2034  #ifdef  __lock_lint
2027 2035          rw_exit(&ip->i_contents);
2028 2036  #endif
2029 2037          return (error);
2030 2038  }
2031 2039  
2032 2040  /* ARGSUSED */
2033 2041  static int32_t
2034 2042  udf_map(
2035 2043          struct vnode *vp,
2036 2044          offset_t off,
2037 2045          struct as *as,
2038 2046          caddr_t *addrp,
2039 2047          size_t len,
2040 2048          uint8_t prot,
2041 2049          uint8_t maxprot,
2042 2050          uint32_t flags,
2043 2051          struct cred *cr,
2044 2052          caller_context_t *ct)
2045 2053  {
2046 2054          struct segvn_crargs vn_a;
2047 2055          int32_t error = 0;
2048 2056  
2049 2057          ud_printf("udf_map\n");
2050 2058  
2051 2059          if (vp->v_flag & VNOMAP) {
2052 2060                  error = ENOSYS;
2053 2061                  goto end;
2054 2062          }
2055 2063  
2056 2064          if ((off < (offset_t)0) ||
2057 2065              ((off + len) < (offset_t)0)) {
2058 2066                  error = EINVAL;
2059 2067                  goto end;
2060 2068          }
2061 2069  
2062 2070          if (vp->v_type != VREG) {
2063 2071                  error = ENODEV;
2064 2072                  goto end;
2065 2073          }
2066 2074  
2067 2075          /*
2068 2076           * If file is being locked, disallow mapping.
2069 2077           */
2070 2078          if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2071 2079                  error = EAGAIN;
2072 2080                  goto end;
2073 2081          }
2074 2082  
2075 2083          as_rangelock(as);
2076 2084          error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2077 2085          if (error != 0) {
2078 2086                  as_rangeunlock(as);
2079 2087                  goto end;
2080 2088          }
2081 2089  
2082 2090          vn_a.vp = vp;
2083 2091          vn_a.offset = off;
2084 2092          vn_a.type = flags & MAP_TYPE;
2085 2093          vn_a.prot = prot;
2086 2094          vn_a.maxprot = maxprot;
2087 2095          vn_a.cred = cr;
2088 2096          vn_a.amp = NULL;
2089 2097          vn_a.flags = flags & ~MAP_TYPE;
2090 2098          vn_a.szc = 0;
2091 2099          vn_a.lgrp_mem_policy_flags = 0;
2092 2100  
2093 2101          error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2094 2102          as_rangeunlock(as);
2095 2103  
2096 2104  end:
2097 2105          return (error);
2098 2106  }
2099 2107  
2100 2108  /* ARGSUSED */
2101 2109  static int32_t
2102 2110  udf_addmap(struct vnode *vp,
2103 2111          offset_t off,
2104 2112          struct as *as,
2105 2113          caddr_t addr,
2106 2114          size_t len,
2107 2115          uint8_t prot,
2108 2116          uint8_t maxprot,
2109 2117          uint32_t flags,
2110 2118          struct cred *cr,
2111 2119          caller_context_t *ct)
2112 2120  {
2113 2121          struct ud_inode *ip = VTOI(vp);
2114 2122  
2115 2123          ud_printf("udf_addmap\n");
2116 2124  
2117 2125          if (vp->v_flag & VNOMAP) {
2118 2126                  return (ENOSYS);
2119 2127          }
2120 2128  
2121 2129          mutex_enter(&ip->i_tlock);
2122 2130          ip->i_mapcnt += btopr(len);
2123 2131          mutex_exit(&ip->i_tlock);
2124 2132  
2125 2133          return (0);
2126 2134  }
2127 2135  
2128 2136  /* ARGSUSED */
2129 2137  static int32_t
2130 2138  udf_delmap(
2131 2139          struct vnode *vp, offset_t off,
2132 2140          struct as *as,
2133 2141          caddr_t addr,
2134 2142          size_t len,
2135 2143          uint32_t prot,
2136 2144          uint32_t maxprot,
2137 2145          uint32_t flags,
2138 2146          struct cred *cr,
2139 2147          caller_context_t *ct)
2140 2148  {
2141 2149          struct ud_inode *ip = VTOI(vp);
2142 2150  
2143 2151          ud_printf("udf_delmap\n");
2144 2152  
2145 2153          if (vp->v_flag & VNOMAP) {
2146 2154                  return (ENOSYS);
2147 2155          }
2148 2156  
2149 2157          mutex_enter(&ip->i_tlock);
2150 2158          ip->i_mapcnt -= btopr(len);     /* Count released mappings */
2151 2159          ASSERT(ip->i_mapcnt >= 0);
2152 2160          mutex_exit(&ip->i_tlock);
2153 2161  
2154 2162          return (0);
2155 2163  }
2156 2164  
2157 2165  /* ARGSUSED */
2158 2166  static int32_t
2159 2167  udf_l_pathconf(
2160 2168          struct vnode *vp,
2161 2169          int32_t cmd,
2162 2170          ulong_t *valp,
2163 2171          struct cred *cr,
2164 2172          caller_context_t *ct)
2165 2173  {
2166 2174          int32_t error = 0;
2167 2175  
2168 2176          ud_printf("udf_l_pathconf\n");
2169 2177  
2170 2178          if (cmd == _PC_FILESIZEBITS) {
2171 2179                  /*
2172 2180                   * udf supports 64 bits as file size
2173 2181                   * but there are several other restrictions
2174 2182                   * it only supports 32-bit block numbers and
2175 2183                   * daddr32_t is only and int32_t so taking these
2176 2184                   * into account we can stay just as where ufs is
2177 2185                   */
2178 2186                  *valp = 41;
2179 2187          } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2180 2188                  /* nanosecond timestamp resolution */
2181 2189                  *valp = 1L;
2182 2190          } else {
2183 2191                  error = fs_pathconf(vp, cmd, valp, cr, ct);
2184 2192          }
2185 2193  
2186 2194          return (error);
2187 2195  }
2188 2196  
2189 2197  uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2190 2198  #ifndef __lint
2191 2199  _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2192 2200  _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2193 2201  #endif
2194 2202  /*
2195 2203   * Assumption is that there will not be a pageio request
2196 2204   * to a enbedded file
2197 2205   */
2198 2206  /* ARGSUSED */
2199 2207  static int32_t
2200 2208  udf_pageio(
2201 2209          struct vnode *vp,
2202 2210          struct page *pp,
2203 2211          u_offset_t io_off,
2204 2212          size_t io_len,
2205 2213          int32_t flags,
2206 2214          struct cred *cr,
2207 2215          caller_context_t *ct)
2208 2216  {
2209 2217          daddr_t bn;
2210 2218          struct buf *bp;
2211 2219          struct ud_inode *ip = VTOI(vp);
2212 2220          int32_t dolock, error = 0, contig, multi_io;
2213 2221          size_t done_len = 0, cur_len = 0;
2214 2222          page_t *npp = NULL, *opp = NULL, *cpp = pp;
2215 2223  
2216 2224          if (pp == NULL) {
2217 2225                  return (EINVAL);
2218 2226          }
2219 2227  
2220 2228          dolock = (rw_owner(&ip->i_contents) != curthread);
2221 2229  
2222 2230          /*
2223 2231           * We need a better check.  Ideally, we would use another
2224 2232           * vnodeops so that hlocked and forcibly unmounted file
2225 2233           * systems would return EIO where appropriate and w/o the
2226 2234           * need for these checks.
2227 2235           */
2228 2236          if (ip->i_udf == NULL) {
2229 2237                  return (EIO);
2230 2238          }
2231 2239  
2232 2240  #ifdef  __lock_lint
2233 2241          rw_enter(&ip->i_contents, RW_READER);
2234 2242  #else
2235 2243          if (dolock) {
2236 2244                  rw_enter(&ip->i_contents, RW_READER);
2237 2245          }
2238 2246  #endif
2239 2247  
2240 2248          /*
2241 2249           * Break the io request into chunks, one for each contiguous
2242 2250           * stretch of disk blocks in the target file.
2243 2251           */
2244 2252          while (done_len < io_len) {
2245 2253                  ASSERT(cpp);
2246 2254                  bp = NULL;
2247 2255                  contig = 0;
2248 2256                  if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2249 2257                      &bn, &contig)) {
2250 2258                          break;
2251 2259                  }
2252 2260  
2253 2261                  if (bn == UDF_HOLE) {   /* No holey swapfiles */
2254 2262                          cmn_err(CE_WARN, "SWAP file has HOLES");
2255 2263                          error = EINVAL;
2256 2264                          break;
2257 2265                  }
2258 2266  
2259 2267                  cur_len = MIN(io_len - done_len, contig);
2260 2268  
2261 2269                  /*
2262 2270                   * Check if more than one I/O is
2263 2271                   * required to complete the given
2264 2272                   * I/O operation
2265 2273                   */
2266 2274                  if (ip->i_udf->udf_lbsize < PAGESIZE) {
2267 2275                          if (cur_len >= PAGESIZE) {
2268 2276                                  multi_io = 0;
2269 2277                                  cur_len &= PAGEMASK;
2270 2278                          } else {
2271 2279                                  multi_io = 1;
2272 2280                                  cur_len = MIN(io_len - done_len, PAGESIZE);
2273 2281                          }
2274 2282                  }
2275 2283                  page_list_break(&cpp, &npp, btop(cur_len));
2276 2284  
2277 2285                  bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2278 2286                  ASSERT(bp != NULL);
2279 2287  
2280 2288                  bp->b_edev = ip->i_dev;
2281 2289                  bp->b_dev = cmpdev(ip->i_dev);
2282 2290                  bp->b_blkno = bn;
2283 2291                  bp->b_un.b_addr = (caddr_t)0;
2284 2292                  bp->b_file = vp;
2285 2293                  bp->b_offset = (offset_t)(io_off + done_len);
2286 2294  
2287 2295  /*
2288 2296   *              ub.ub_pageios.value.ul++;
2289 2297   */
2290 2298                  if (multi_io == 0) {
2291 2299                          (void) bdev_strategy(bp);
2292 2300                  } else {
2293 2301                          error = ud_multi_strat(ip, cpp, bp,
2294 2302                              (u_offset_t)(io_off + done_len));
2295 2303                          if (error != 0) {
2296 2304                                  pageio_done(bp);
2297 2305                                  break;
2298 2306                          }
2299 2307                  }
2300 2308                  if (flags & B_READ) {
2301 2309                          ud_pageio_reads++;
2302 2310                  } else {
2303 2311                          ud_pageio_writes++;
2304 2312                  }
2305 2313  
2306 2314                  /*
2307 2315                   * If the request is not B_ASYNC, wait for i/o to complete
2308 2316                   * and re-assemble the page list to return to the caller.
2309 2317                   * If it is B_ASYNC we leave the page list in pieces and
2310 2318                   * cleanup() will dispose of them.
2311 2319                   */
2312 2320                  if ((flags & B_ASYNC) == 0) {
2313 2321                          error = biowait(bp);
2314 2322                          pageio_done(bp);
2315 2323                          if (error) {
2316 2324                                  break;
2317 2325                          }
2318 2326                          page_list_concat(&opp, &cpp);
2319 2327                  }
2320 2328                  cpp = npp;
2321 2329                  npp = NULL;
2322 2330                  done_len += cur_len;
2323 2331          }
2324 2332  
2325 2333          ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2326 2334          if (error) {
2327 2335                  if (flags & B_ASYNC) {
2328 2336                          /* Cleanup unprocessed parts of list */
2329 2337                          page_list_concat(&cpp, &npp);
2330 2338                          if (flags & B_READ) {
2331 2339                                  pvn_read_done(cpp, B_ERROR);
2332 2340                          } else {
2333 2341                                  pvn_write_done(cpp, B_ERROR);
2334 2342                          }
2335 2343                  } else {
2336 2344                          /* Re-assemble list and let caller clean up */
2337 2345                          page_list_concat(&opp, &cpp);
2338 2346                          page_list_concat(&opp, &npp);
2339 2347                  }
2340 2348          }
2341 2349  
2342 2350  #ifdef  __lock_lint
2343 2351          rw_exit(&ip->i_contents);
2344 2352  #else
2345 2353          if (dolock) {
2346 2354                  rw_exit(&ip->i_contents);
2347 2355          }
2348 2356  #endif
2349 2357          return (error);
2350 2358  }
2351 2359  
2352 2360  
2353 2361  
2354 2362  
2355 2363  /* -------------------- local functions --------------------------- */
2356 2364  
2357 2365  
2358 2366  
2359 2367  int32_t
2360 2368  ud_rdwri(enum uio_rw rw, int32_t ioflag,
2361 2369          struct ud_inode *ip, caddr_t base, int32_t len,
2362 2370          offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2363 2371  {
2364 2372          int32_t error;
2365 2373          struct uio auio;
2366 2374          struct iovec aiov;
2367 2375  
2368 2376          ud_printf("ud_rdwri\n");
2369 2377  
2370 2378          bzero((caddr_t)&auio, sizeof (uio_t));
2371 2379          bzero((caddr_t)&aiov, sizeof (iovec_t));
2372 2380  
2373 2381          aiov.iov_base = base;
2374 2382          aiov.iov_len = len;
2375 2383          auio.uio_iov = &aiov;
2376 2384          auio.uio_iovcnt = 1;
2377 2385          auio.uio_loffset = offset;
2378 2386          auio.uio_segflg = (int16_t)seg;
2379 2387          auio.uio_resid = len;
2380 2388  
2381 2389          if (rw == UIO_WRITE) {
2382 2390                  auio.uio_fmode = FWRITE;
2383 2391                  auio.uio_extflg = UIO_COPY_DEFAULT;
2384 2392                  auio.uio_llimit = curproc->p_fsz_ctl;
2385 2393                  error = ud_wrip(ip, &auio, ioflag, cr);
2386 2394          } else {
2387 2395                  auio.uio_fmode = FREAD;
2388 2396                  auio.uio_extflg = UIO_COPY_CACHED;
2389 2397                  auio.uio_llimit = MAXOFFSET_T;
2390 2398                  error = ud_rdip(ip, &auio, ioflag, cr);
2391 2399          }
2392 2400  
2393 2401          if (aresid) {
2394 2402                  *aresid = auio.uio_resid;
2395 2403          } else if (auio.uio_resid) {
2396 2404                  error = EIO;
2397 2405          }
2398 2406          return (error);
2399 2407  }
2400 2408  
2401 2409  /*
2402 2410   * Free behind hacks.  The pager is busted.
2403 2411   * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2404 2412   * or B_FREE_IF_TIGHT_ON_MEMORY.
2405 2413   */
2406 2414  int32_t ud_freebehind = 1;
2407 2415  int32_t ud_smallfile = 32 * 1024;
2408 2416  
2409 2417  /* ARGSUSED */
2410 2418  int32_t
2411 2419  ud_getpage_miss(struct vnode *vp, u_offset_t off,
2412 2420          size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2413 2421          size_t plsz, enum seg_rw rw, int32_t seq)
2414 2422  {
2415 2423          struct ud_inode *ip = VTOI(vp);
2416 2424          int32_t err = 0;
2417 2425          size_t io_len;
2418 2426          u_offset_t io_off;
2419 2427          u_offset_t pgoff;
2420 2428          page_t *pp;
2421 2429  
2422 2430          pl[0] = NULL;
2423 2431  
2424 2432          /*
2425 2433           * Figure out whether the page can be created, or must be
2426 2434           * read from the disk
2427 2435           */
2428 2436          if (rw == S_CREATE) {
2429 2437                  if ((pp = page_create_va(vp, off,
2430 2438                      PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2431 2439                          cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2432 2440                          return (EINVAL);
2433 2441                  }
2434 2442                  io_len = PAGESIZE;
2435 2443          } else {
2436 2444                  pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2437 2445                      &io_len, off, PAGESIZE, 0);
2438 2446  
2439 2447                  /*
2440 2448                   * Some other thread has entered the page.
2441 2449                   * ud_getpage will retry page_lookup.
2442 2450                   */
2443 2451                  if (pp == NULL) {
2444 2452                          return (0);
2445 2453                  }
2446 2454  
2447 2455                  /*
2448 2456                   * Fill the page with as much data as we can from the file.
2449 2457                   */
2450 2458                  err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2451 2459                  if (err) {
2452 2460                          pvn_read_done(pp, B_ERROR);
2453 2461                          return (err);
2454 2462                  }
2455 2463  
2456 2464                  /*
2457 2465                   * XXX ??? ufs has io_len instead of pgoff below
2458 2466                   */
2459 2467                  ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2460 2468  
2461 2469                  /*
2462 2470                   * If the file access is sequential, initiate read ahead
2463 2471                   * of the next cluster.
2464 2472                   */
2465 2473                  if (seq && ip->i_nextrio < ip->i_size) {
2466 2474                          ud_getpage_ra(vp, off, seg, addr);
2467 2475                  }
2468 2476          }
2469 2477  
2470 2478  outmiss:
2471 2479          pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2472 2480          return (err);
2473 2481  }
2474 2482  
2475 2483  /* ARGSUSED */
2476 2484  void
2477 2485  ud_getpage_ra(struct vnode *vp,
2478 2486          u_offset_t off, struct seg *seg, caddr_t addr)
2479 2487  {
2480 2488          page_t *pp;
2481 2489          size_t io_len;
2482 2490          struct ud_inode *ip = VTOI(vp);
2483 2491          u_offset_t io_off = ip->i_nextrio, pgoff;
2484 2492          caddr_t addr2 = addr + (io_off - off);
2485 2493          daddr_t bn;
2486 2494          int32_t contig = 0;
2487 2495  
2488 2496          /*
2489 2497           * Is this test needed?
2490 2498           */
2491 2499  
2492 2500          if (addr2 >= seg->s_base + seg->s_size) {
2493 2501                  return;
2494 2502          }
2495 2503  
2496 2504          contig = 0;
2497 2505          if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2498 2506                  return;
2499 2507          }
2500 2508  
2501 2509          pp = pvn_read_kluster(vp, io_off, seg, addr2,
2502 2510              &io_off, &io_len, io_off, PAGESIZE, 1);
2503 2511  
2504 2512          /*
2505 2513           * Some other thread has entered the page.
2506 2514           * So no read head done here (ie we will have to and wait
2507 2515           * for the read when needed).
2508 2516           */
2509 2517  
2510 2518          if (pp == NULL) {
2511 2519                  return;
2512 2520          }
2513 2521  
2514 2522          (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2515 2523          ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2516 2524  }
2517 2525  
2518 2526  int
2519 2527  ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2520 2528          uint32_t bflgs, u_offset_t *pg_off)
2521 2529  {
2522 2530          daddr_t bn;
2523 2531          struct buf *bp;
2524 2532          caddr_t kaddr, caddr;
2525 2533          int32_t error = 0, contig = 0, multi_io = 0;
2526 2534          int32_t lbsize = ip->i_udf->udf_lbsize;
2527 2535          int32_t lbmask = ip->i_udf->udf_lbmask;
2528 2536          uint64_t isize;
2529 2537  
2530 2538          isize = (ip->i_size + lbmask) & (~lbmask);
2531 2539          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2532 2540  
2533 2541                  /*
2534 2542                   * Embedded file read file_entry
2535 2543                   * from buffer cache and copy the required
2536 2544                   * portions
2537 2545                   */
2538 2546                  bp = ud_bread(ip->i_dev,
2539 2547                      ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2540 2548                  if ((bp->b_error == 0) &&
2541 2549                      (bp->b_resid == 0)) {
2542 2550  
2543 2551                          caddr = bp->b_un.b_addr + ip->i_data_off;
2544 2552  
2545 2553                          /*
2546 2554                           * mapin to kvm
2547 2555                           */
2548 2556                          kaddr = (caddr_t)ppmapin(pp,
2549 2557                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2550 2558                          (void) kcopy(caddr, kaddr, ip->i_size);
2551 2559  
2552 2560                          /*
2553 2561                           * mapout of kvm
2554 2562                           */
2555 2563                          ppmapout(kaddr);
2556 2564                  }
2557 2565                  brelse(bp);
2558 2566                  contig = ip->i_size;
2559 2567          } else {
2560 2568  
2561 2569                  /*
2562 2570                   * Get the continuous size and block number
2563 2571                   * at offset "off"
2564 2572                   */
2565 2573                  if (error = ud_bmap_read(ip, off, &bn, &contig))
2566 2574                          goto out;
2567 2575                  contig = MIN(contig, PAGESIZE);
2568 2576                  contig = (contig + lbmask) & (~lbmask);
2569 2577  
2570 2578                  /*
2571 2579                   * Zero part of the page which we are not
2572 2580                   * going to read from the disk.
2573 2581                   */
2574 2582  
2575 2583                  if (bn == UDF_HOLE) {
2576 2584  
2577 2585                          /*
2578 2586                           * This is a HOLE. Just zero out
2579 2587                           * the page
2580 2588                           */
2581 2589                          if (((off + contig) == isize) ||
2582 2590                              (contig == PAGESIZE)) {
2583 2591                                  pagezero(pp->p_prev, 0, PAGESIZE);
2584 2592                                  goto out;
2585 2593                          }
2586 2594                  }
2587 2595  
2588 2596                  if (contig < PAGESIZE) {
2589 2597                          uint64_t count;
2590 2598  
2591 2599                          count = isize - off;
2592 2600                          if (contig != count) {
2593 2601                                  multi_io = 1;
2594 2602                                  contig = (int32_t)(MIN(count, PAGESIZE));
2595 2603                          } else {
2596 2604                                  pagezero(pp->p_prev, contig, PAGESIZE - contig);
2597 2605                          }
2598 2606                  }
2599 2607  
2600 2608                  /*
2601 2609                   * Get a bp and initialize it
2602 2610                   */
2603 2611                  bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2604 2612                  ASSERT(bp != NULL);
2605 2613  
2606 2614                  bp->b_edev = ip->i_dev;
2607 2615                  bp->b_dev = cmpdev(ip->i_dev);
2608 2616                  bp->b_blkno = bn;
2609 2617                  bp->b_un.b_addr = 0;
2610 2618                  bp->b_file = ip->i_vnode;
2611 2619  
2612 2620                  /*
2613 2621                   * Start I/O
2614 2622                   */
2615 2623                  if (multi_io == 0) {
2616 2624  
2617 2625                          /*
2618 2626                           * Single I/O is sufficient for this page
2619 2627                           */
2620 2628                          (void) bdev_strategy(bp);
2621 2629                  } else {
2622 2630  
2623 2631                          /*
2624 2632                           * We need to do the I/O in
2625 2633                           * piece's
2626 2634                           */
2627 2635                          error = ud_multi_strat(ip, pp, bp, off);
2628 2636                          if (error != 0) {
2629 2637                                  goto out;
2630 2638                          }
2631 2639                  }
2632 2640                  if ((bflgs & B_ASYNC) == 0) {
2633 2641  
2634 2642                          /*
2635 2643                           * Wait for i/o to complete.
2636 2644                           */
2637 2645  
2638 2646                          error = biowait(bp);
2639 2647                          pageio_done(bp);
2640 2648                          if (error) {
2641 2649                                  goto out;
2642 2650                          }
2643 2651                  }
2644 2652          }
2645 2653          if ((off + contig) >= ip->i_size) {
2646 2654                  contig = ip->i_size - off;
2647 2655          }
2648 2656  
2649 2657  out:
2650 2658          *pg_off = contig;
2651 2659          return (error);
2652 2660  }
2653 2661  
2654 2662  int32_t
2655 2663  ud_putpages(struct vnode *vp, offset_t off,
2656 2664          size_t len, int32_t flags, struct cred *cr)
2657 2665  {
2658 2666          struct ud_inode *ip;
2659 2667          page_t *pp;
2660 2668          u_offset_t io_off;
2661 2669          size_t io_len;
2662 2670          u_offset_t eoff;
2663 2671          int32_t err = 0;
2664 2672          int32_t dolock;
2665 2673  
2666 2674          ud_printf("ud_putpages\n");
2667 2675  
2668 2676          if (vp->v_count == 0) {
2669 2677                  cmn_err(CE_WARN, "ud_putpages: bad v_count");
2670 2678                  return (EINVAL);
2671 2679          }
2672 2680  
2673 2681          ip = VTOI(vp);
2674 2682  
2675 2683          /*
2676 2684           * Acquire the readers/write inode lock before locking
2677 2685           * any pages in this inode.
2678 2686           * The inode lock is held during i/o.
2679 2687           */
2680 2688          if (len == 0) {
2681 2689                  mutex_enter(&ip->i_tlock);
2682 2690                  ip->i_delayoff = ip->i_delaylen = 0;
2683 2691                  mutex_exit(&ip->i_tlock);
2684 2692          }
2685 2693  #ifdef  __lock_lint
2686 2694          rw_enter(&ip->i_contents, RW_READER);
2687 2695  #else
2688 2696          dolock = (rw_owner(&ip->i_contents) != curthread);
2689 2697          if (dolock) {
2690 2698                  rw_enter(&ip->i_contents, RW_READER);
2691 2699          }
2692 2700  #endif
2693 2701  
2694 2702          if (!vn_has_cached_data(vp)) {
2695 2703  #ifdef  __lock_lint
2696 2704                  rw_exit(&ip->i_contents);
2697 2705  #else
2698 2706                  if (dolock) {
2699 2707                          rw_exit(&ip->i_contents);
2700 2708                  }
2701 2709  #endif
2702 2710                  return (0);
2703 2711          }
2704 2712  
2705 2713          if (len == 0) {
2706 2714                  /*
2707 2715                   * Search the entire vp list for pages >= off.
2708 2716                   */
2709 2717                  err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2710 2718                      flags, cr);
2711 2719          } else {
2712 2720                  /*
2713 2721                   * Loop over all offsets in the range looking for
2714 2722                   * pages to deal with.
2715 2723                   */
2716 2724                  if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2717 2725                          eoff = MIN(off + len, eoff);
2718 2726                  } else {
2719 2727                          eoff = off + len;
2720 2728                  }
2721 2729  
2722 2730                  for (io_off = off; io_off < eoff; io_off += io_len) {
2723 2731                          /*
2724 2732                           * If we are not invalidating, synchronously
2725 2733                           * freeing or writing pages, use the routine
2726 2734                           * page_lookup_nowait() to prevent reclaiming
2727 2735                           * them from the free list.
2728 2736                           */
2729 2737                          if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2730 2738                                  pp = page_lookup(vp, io_off,
2731 2739                                      (flags & (B_INVAL | B_FREE)) ?
2732 2740                                      SE_EXCL : SE_SHARED);
2733 2741                          } else {
2734 2742                                  pp = page_lookup_nowait(vp, io_off,
2735 2743                                      (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2736 2744                          }
2737 2745  
2738 2746                          if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2739 2747                                  io_len = PAGESIZE;
2740 2748                          } else {
2741 2749  
2742 2750                                  err = ud_putapage(vp, pp,
2743 2751                                      &io_off, &io_len, flags, cr);
2744 2752                                  if (err != 0) {
2745 2753                                          break;
2746 2754                                  }
2747 2755                                  /*
2748 2756                                   * "io_off" and "io_len" are returned as
2749 2757                                   * the range of pages we actually wrote.
2750 2758                                   * This allows us to skip ahead more quickly
2751 2759                                   * since several pages may've been dealt
2752 2760                                   * with by this iteration of the loop.
2753 2761                                   */
2754 2762                          }
2755 2763                  }
2756 2764          }
2757 2765          if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2758 2766                  /*
2759 2767                   * We have just sync'ed back all the pages on
2760 2768                   * the inode, turn off the IMODTIME flag.
2761 2769                   */
2762 2770                  mutex_enter(&ip->i_tlock);
2763 2771                  ip->i_flag &= ~IMODTIME;
2764 2772                  mutex_exit(&ip->i_tlock);
2765 2773          }
2766 2774  #ifdef  __lock_lint
2767 2775          rw_exit(&ip->i_contents);
2768 2776  #else
2769 2777          if (dolock) {
2770 2778                  rw_exit(&ip->i_contents);
2771 2779          }
2772 2780  #endif
2773 2781          return (err);
2774 2782  }
2775 2783  
2776 2784  /* ARGSUSED */
2777 2785  int32_t
2778 2786  ud_putapage(struct vnode *vp,
2779 2787          page_t *pp, u_offset_t *offp,
2780 2788          size_t *lenp, int32_t flags, struct cred *cr)
2781 2789  {
2782 2790          daddr_t bn;
2783 2791          size_t io_len;
2784 2792          struct ud_inode *ip;
2785 2793          int32_t error = 0, contig, multi_io = 0;
2786 2794          struct udf_vfs *udf_vfsp;
2787 2795          u_offset_t off, io_off;
2788 2796          caddr_t kaddr, caddr;
2789 2797          struct buf *bp = NULL;
2790 2798          int32_t lbmask;
2791 2799          uint64_t isize;
2792 2800          uint16_t crc_len;
2793 2801          struct file_entry *fe;
2794 2802  
2795 2803          ud_printf("ud_putapage\n");
2796 2804  
2797 2805          ip = VTOI(vp);
2798 2806          ASSERT(ip);
2799 2807          ASSERT(RW_LOCK_HELD(&ip->i_contents));
2800 2808          lbmask = ip->i_udf->udf_lbmask;
2801 2809          isize = (ip->i_size + lbmask) & (~lbmask);
2802 2810  
2803 2811          udf_vfsp = ip->i_udf;
2804 2812          ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2805 2813  
2806 2814          /*
2807 2815           * If the modified time on the inode has not already been
2808 2816           * set elsewhere (e.g. for write/setattr) we set the time now.
2809 2817           * This gives us approximate modified times for mmap'ed files
2810 2818           * which are modified via stores in the user address space.
2811 2819           */
2812 2820          if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2813 2821                  mutex_enter(&ip->i_tlock);
2814 2822                  ip->i_flag |= IUPD;
2815 2823                  ITIMES_NOLOCK(ip);
2816 2824                  mutex_exit(&ip->i_tlock);
2817 2825          }
2818 2826  
2819 2827  
2820 2828          /*
2821 2829           * Align the request to a block boundry (for old file systems),
2822 2830           * and go ask bmap() how contiguous things are for this file.
2823 2831           */
2824 2832          off = pp->p_offset & ~(offset_t)lbmask;
2825 2833                                  /* block align it */
2826 2834  
2827 2835  
2828 2836          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2829 2837                  ASSERT(ip->i_size <= ip->i_max_emb);
2830 2838  
2831 2839                  pp = pvn_write_kluster(vp, pp, &io_off,
2832 2840                      &io_len, off, PAGESIZE, flags);
2833 2841                  if (io_len == 0) {
2834 2842                          io_len = PAGESIZE;
2835 2843                  }
2836 2844  
2837 2845                  bp = ud_bread(ip->i_dev,
2838 2846                      ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2839 2847                      udf_vfsp->udf_lbsize);
2840 2848                  fe = (struct file_entry *)bp->b_un.b_addr;
2841 2849                  if ((bp->b_flags & B_ERROR) ||
2842 2850                      (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2843 2851                      ip->i_icb_block,
2844 2852                      1, udf_vfsp->udf_lbsize) != 0)) {
2845 2853                          if (pp != NULL)
2846 2854                                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2847 2855                          if (bp->b_flags & B_ERROR) {
2848 2856                                  error = EIO;
2849 2857                          } else {
2850 2858                                  error = EINVAL;
2851 2859                          }
2852 2860                          brelse(bp);
2853 2861                          return (error);
2854 2862                  }
2855 2863                  if ((bp->b_error == 0) &&
2856 2864                      (bp->b_resid == 0)) {
2857 2865  
2858 2866                          caddr = bp->b_un.b_addr + ip->i_data_off;
2859 2867                          kaddr = (caddr_t)ppmapin(pp,
2860 2868                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2861 2869                          (void) kcopy(kaddr, caddr, ip->i_size);
2862 2870                          ppmapout(kaddr);
2863 2871                  }
2864 2872                  crc_len = offsetof(struct file_entry, fe_spec) +
2865 2873                      SWAP_32(fe->fe_len_ear);
2866 2874                  crc_len += ip->i_size;
2867 2875                  ud_make_tag(ip->i_udf, &fe->fe_tag,
2868 2876                      UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2869 2877  
2870 2878                  bwrite(bp);
2871 2879  
2872 2880                  if (flags & B_ASYNC) {
2873 2881                          pvn_write_done(pp, flags);
2874 2882                  }
2875 2883                  contig = ip->i_size;
2876 2884          } else {
2877 2885  
2878 2886                  if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2879 2887                          goto out;
2880 2888                  }
2881 2889                  contig = MIN(contig, PAGESIZE);
2882 2890                  contig = (contig + lbmask) & (~lbmask);
2883 2891  
2884 2892                  if (contig < PAGESIZE) {
2885 2893                          uint64_t count;
2886 2894  
2887 2895                          count = isize - off;
2888 2896                          if (contig != count) {
2889 2897                                  multi_io = 1;
2890 2898                                  contig = (int32_t)(MIN(count, PAGESIZE));
2891 2899                          }
2892 2900                  }
2893 2901  
2894 2902                  if ((off + contig) > isize) {
2895 2903                          contig = isize - off;
2896 2904                  }
2897 2905  
2898 2906                  if (contig > PAGESIZE) {
2899 2907                          if (contig & PAGEOFFSET) {
2900 2908                                  contig &= PAGEMASK;
2901 2909                          }
2902 2910                  }
2903 2911  
2904 2912                  pp = pvn_write_kluster(vp, pp, &io_off,
2905 2913                      &io_len, off, contig, flags);
2906 2914                  if (io_len == 0) {
2907 2915                          io_len = PAGESIZE;
2908 2916                  }
2909 2917  
2910 2918                  bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2911 2919                  ASSERT(bp != NULL);
2912 2920  
2913 2921                  bp->b_edev = ip->i_dev;
2914 2922                  bp->b_dev = cmpdev(ip->i_dev);
2915 2923                  bp->b_blkno = bn;
2916 2924                  bp->b_un.b_addr = 0;
2917 2925                  bp->b_file = vp;
2918 2926                  bp->b_offset = (offset_t)off;
2919 2927  
2920 2928  
2921 2929                  /*
2922 2930                   * write throttle
2923 2931                   */
2924 2932                  ASSERT(bp->b_iodone == NULL);
2925 2933                  bp->b_iodone = ud_iodone;
2926 2934                  mutex_enter(&ip->i_tlock);
2927 2935                  ip->i_writes += bp->b_bcount;
2928 2936                  mutex_exit(&ip->i_tlock);
2929 2937  
2930 2938                  if (multi_io == 0) {
2931 2939  
2932 2940                          (void) bdev_strategy(bp);
2933 2941                  } else {
2934 2942                          error = ud_multi_strat(ip, pp, bp, off);
2935 2943                          if (error != 0) {
2936 2944                                  goto out;
2937 2945                          }
2938 2946                  }
2939 2947  
2940 2948                  if ((flags & B_ASYNC) == 0) {
2941 2949                          /*
2942 2950                           * Wait for i/o to complete.
2943 2951                           */
2944 2952                          error = biowait(bp);
2945 2953                          pageio_done(bp);
2946 2954                  }
2947 2955          }
2948 2956  
2949 2957          if ((flags & B_ASYNC) == 0) {
2950 2958                  pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2951 2959          }
2952 2960  
2953 2961          pp = NULL;
2954 2962  
2955 2963  out:
2956 2964          if (error != 0 && pp != NULL) {
2957 2965                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2958 2966          }
2959 2967  
2960 2968          if (offp) {
2961 2969                  *offp = io_off;
2962 2970          }
2963 2971          if (lenp) {
2964 2972                  *lenp = io_len;
2965 2973          }
2966 2974  
2967 2975          return (error);
2968 2976  }
2969 2977  
2970 2978  
2971 2979  int32_t
2972 2980  ud_iodone(struct buf *bp)
2973 2981  {
2974 2982          struct ud_inode *ip;
2975 2983  
2976 2984          ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2977 2985  
2978 2986          bp->b_iodone = NULL;
2979 2987  
2980 2988          ip = VTOI(bp->b_pages->p_vnode);
2981 2989  
2982 2990          mutex_enter(&ip->i_tlock);
2983 2991          if (ip->i_writes >= ud_LW) {
2984 2992                  if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2985 2993                          if (ud_WRITES) {
2986 2994                                  cv_broadcast(&ip->i_wrcv); /* wake all up */
2987 2995                          }
2988 2996                  }
2989 2997          } else {
2990 2998                  ip->i_writes -= bp->b_bcount;
2991 2999          }
2992 3000          mutex_exit(&ip->i_tlock);
2993 3001          iodone(bp);
2994 3002          return (0);
2995 3003  }
2996 3004  
2997 3005  /* ARGSUSED3 */
2998 3006  int32_t
2999 3007  ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
3000 3008  {
3001 3009          struct vnode *vp;
3002 3010          struct udf_vfs *udf_vfsp;
3003 3011          krw_t rwtype;
3004 3012          caddr_t base;
3005 3013          uint32_t flags;
3006 3014          int32_t error, n, on, mapon, dofree;
3007 3015          u_offset_t off;
3008 3016          long oresid = uio->uio_resid;
3009 3017  
3010 3018          ASSERT(RW_LOCK_HELD(&ip->i_contents));
3011 3019          if ((ip->i_type != VREG) &&
3012 3020              (ip->i_type != VDIR) &&
3013 3021              (ip->i_type != VLNK)) {
3014 3022                  return (EIO);
3015 3023          }
3016 3024  
3017 3025          if (uio->uio_loffset > MAXOFFSET_T) {
3018 3026                  return (0);
3019 3027          }
3020 3028  
3021 3029          if ((uio->uio_loffset < (offset_t)0) ||
3022 3030              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3023 3031                  return (EINVAL);
3024 3032          }
3025 3033          if (uio->uio_resid == 0) {
3026 3034                  return (0);
3027 3035          }
3028 3036  
3029 3037          vp = ITOV(ip);
3030 3038          udf_vfsp = ip->i_udf;
3031 3039          mutex_enter(&ip->i_tlock);
3032 3040          ip->i_flag |= IACC;
3033 3041          mutex_exit(&ip->i_tlock);
3034 3042  
3035 3043          rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3036 3044  
3037 3045          do {
3038 3046                  offset_t diff;
3039 3047                  u_offset_t uoff = uio->uio_loffset;
3040 3048                  off = uoff & (offset_t)MAXBMASK;
3041 3049                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3042 3050                  on = (int)blkoff(udf_vfsp, uoff);
3043 3051                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3044 3052  
3045 3053                  diff = ip->i_size - uoff;
3046 3054  
3047 3055                  if (diff <= (offset_t)0) {
3048 3056                          error = 0;
3049 3057                          goto out;
3050 3058                  }
3051 3059                  if (diff < (offset_t)n) {
3052 3060                          n = (int)diff;
3053 3061                  }
3054 3062                  dofree = ud_freebehind &&
3055 3063                      ip->i_nextr == (off & PAGEMASK) &&
3056 3064                      off > ud_smallfile;
3057 3065  
3058 3066  #ifndef __lock_lint
3059 3067                  if (rwtype == RW_READER) {
3060 3068                          rw_exit(&ip->i_contents);
3061 3069                  }
3062 3070  #endif
3063 3071  
3064 3072                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3065 3073                      (uint32_t)n, 1, S_READ);
3066 3074                  error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3067 3075  
3068 3076                  flags = 0;
3069 3077                  if (!error) {
3070 3078                          /*
3071 3079                           * If read a whole block, or read to eof,
3072 3080                           * won't need this buffer again soon.
3073 3081                           */
3074 3082                          if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3075 3083                              freemem < lotsfree + pages_before_pager) {
3076 3084                                  flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3077 3085                          }
3078 3086                          /*
3079 3087                           * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3080 3088                           * we want to make sure that the page which has
3081 3089                           * been read, is written on disk if it is dirty.
3082 3090                           * And corresponding indirect blocks should also
3083 3091                           * be flushed out.
3084 3092                           */
3085 3093                          if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3086 3094                                  flags &= ~SM_ASYNC;
3087 3095                                  flags |= SM_WRITE;
3088 3096                          }
3089 3097                          error = segmap_release(segkmap, base, flags);
3090 3098                  } else    {
3091 3099                          (void) segmap_release(segkmap, base, flags);
3092 3100                  }
3093 3101  
3094 3102  #ifndef __lock_lint
3095 3103                  if (rwtype == RW_READER) {
3096 3104                          rw_enter(&ip->i_contents, rwtype);
3097 3105                  }
3098 3106  #endif
3099 3107          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3100 3108  out:
3101 3109          /*
3102 3110           * Inode is updated according to this table if FRSYNC is set.
3103 3111           *
3104 3112           *      FSYNC   FDSYNC(posix.4)
3105 3113           *      --------------------------
3106 3114           *      always  IATTCHG|IBDWRITE
3107 3115           */
3108 3116          if (ioflag & FRSYNC) {
3109 3117                  if ((ioflag & FSYNC) ||
3110 3118                      ((ioflag & FDSYNC) &&
3111 3119                      (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3112 3120                  rw_exit(&ip->i_contents);
3113 3121                  rw_enter(&ip->i_contents, RW_WRITER);
3114 3122                  ud_iupdat(ip, 1);
3115 3123                  }
3116 3124          }
3117 3125          /*
3118 3126           * If we've already done a partial read, terminate
3119 3127           * the read but return no error.
3120 3128           */
3121 3129          if (oresid != uio->uio_resid) {
3122 3130                  error = 0;
3123 3131          }
3124 3132          ITIMES(ip);
3125 3133  
3126 3134          return (error);
3127 3135  }
3128 3136  
3129 3137  int32_t
3130 3138  ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3131 3139  {
3132 3140          caddr_t base;
3133 3141          struct vnode *vp;
3134 3142          struct udf_vfs *udf_vfsp;
3135 3143          uint32_t flags;
3136 3144          int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3137 3145          int32_t pagecreate, newpage;
3138 3146          uint64_t old_i_size;
3139 3147          u_offset_t off;
3140 3148          long start_resid = uio->uio_resid, premove_resid;
3141 3149          rlim64_t limit = uio->uio_limit;
3142 3150  
3143 3151  
3144 3152          ASSERT(RW_WRITE_HELD(&ip->i_contents));
3145 3153          if ((ip->i_type != VREG) &&
3146 3154              (ip->i_type != VDIR) &&
3147 3155              (ip->i_type != VLNK)) {
3148 3156                  return (EIO);
3149 3157          }
3150 3158  
3151 3159          if (uio->uio_loffset >= MAXOFFSET_T) {
3152 3160                  return (EFBIG);
3153 3161          }
3154 3162          /*
3155 3163           * see udf_l_pathconf
3156 3164           */
3157 3165          if (limit > (((uint64_t)1 << 40) - 1)) {
3158 3166                  limit = ((uint64_t)1 << 40) - 1;
3159 3167          }
3160 3168          if (uio->uio_loffset >= limit) {
3161 3169                  proc_t *p = ttoproc(curthread);
3162 3170  
3163 3171                  mutex_enter(&p->p_lock);
3164 3172                  (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3165 3173                      p, RCA_UNSAFE_SIGINFO);
3166 3174                  mutex_exit(&p->p_lock);
3167 3175                  return (EFBIG);
3168 3176          }
3169 3177          if ((uio->uio_loffset < (offset_t)0) ||
3170 3178              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3171 3179                  return (EINVAL);
3172 3180          }
3173 3181          if (uio->uio_resid == 0) {
3174 3182                  return (0);
3175 3183          }
3176 3184  
3177 3185          mutex_enter(&ip->i_tlock);
3178 3186          ip->i_flag |= INOACC;
3179 3187  
3180 3188          if (ioflag & (FSYNC | FDSYNC)) {
3181 3189                  ip->i_flag |= ISYNC;
3182 3190                  iupdat_flag = 1;
3183 3191          }
3184 3192          mutex_exit(&ip->i_tlock);
3185 3193  
3186 3194          udf_vfsp = ip->i_udf;
3187 3195          vp = ITOV(ip);
3188 3196  
3189 3197          do {
3190 3198                  u_offset_t uoff = uio->uio_loffset;
3191 3199                  off = uoff & (offset_t)MAXBMASK;
3192 3200                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3193 3201                  on = (int)blkoff(udf_vfsp, uoff);
3194 3202                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3195 3203  
3196 3204                  if (ip->i_type == VREG && uoff + n >= limit) {
3197 3205                          if (uoff >= limit) {
3198 3206                                  error = EFBIG;
3199 3207                                  goto out;
3200 3208                          }
3201 3209                          n = (int)(limit - (rlim64_t)uoff);
3202 3210                  }
3203 3211                  if (uoff + n > ip->i_size) {
3204 3212                          /*
3205 3213                           * We are extending the length of the file.
3206 3214                           * bmap is used so that we are sure that
3207 3215                           * if we need to allocate new blocks, that it
3208 3216                           * is done here before we up the file size.
3209 3217                           */
3210 3218                          error = ud_bmap_write(ip, uoff,
3211 3219                              (int)(on + n), mapon == 0, cr);
3212 3220                          if (error) {
3213 3221                                  break;
3214 3222                          }
3215 3223                          i_size_changed = 1;
3216 3224                          old_i_size = ip->i_size;
3217 3225                          ip->i_size = uoff + n;
3218 3226                          /*
3219 3227                           * If we are writing from the beginning of
3220 3228                           * the mapping, we can just create the
3221 3229                           * pages without having to read them.
3222 3230                           */
3223 3231                          pagecreate = (mapon == 0);
3224 3232                  } else if (n == MAXBSIZE) {
3225 3233                          /*
3226 3234                           * Going to do a whole mappings worth,
3227 3235                           * so we can just create the pages w/o
3228 3236                           * having to read them in.  But before
3229 3237                           * we do that, we need to make sure any
3230 3238                           * needed blocks are allocated first.
3231 3239                           */
3232 3240                          error = ud_bmap_write(ip, uoff,
3233 3241                              (int)(on + n), 1, cr);
3234 3242                          if (error) {
3235 3243                                  break;
3236 3244                          }
3237 3245                          pagecreate = 1;
3238 3246                  } else {
3239 3247                          pagecreate = 0;
3240 3248                  }
3241 3249  
3242 3250                  rw_exit(&ip->i_contents);
3243 3251  
3244 3252                  /*
3245 3253                   * Touch the page and fault it in if it is not in
3246 3254                   * core before segmap_getmapflt can lock it. This
3247 3255                   * is to avoid the deadlock if the buffer is mapped
3248 3256                   * to the same file through mmap which we want to
3249 3257                   * write to.
3250 3258                   */
3251 3259                  uio_prefaultpages((long)n, uio);
3252 3260  
3253 3261                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3254 3262                      (uint32_t)n, !pagecreate, S_WRITE);
3255 3263  
3256 3264                  /*
3257 3265                   * segmap_pagecreate() returns 1 if it calls
3258 3266                   * page_create_va() to allocate any pages.
3259 3267                   */
3260 3268                  newpage = 0;
3261 3269                  if (pagecreate) {
3262 3270                          newpage = segmap_pagecreate(segkmap, base,
3263 3271                              (size_t)n, 0);
3264 3272                  }
3265 3273  
3266 3274                  premove_resid = uio->uio_resid;
3267 3275                  error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3268 3276  
3269 3277                  if (pagecreate &&
3270 3278                      uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3271 3279                          /*
3272 3280                           * We created pages w/o initializing them completely,
3273 3281                           * thus we need to zero the part that wasn't set up.
3274 3282                           * This happens on most EOF write cases and if
3275 3283                           * we had some sort of error during the uiomove.
3276 3284                           */
3277 3285                          int nzero, nmoved;
3278 3286  
3279 3287                          nmoved = (int)(uio->uio_loffset - (off + mapon));
3280 3288                          ASSERT(nmoved >= 0 && nmoved <= n);
3281 3289                          nzero = roundup(on + n, PAGESIZE) - nmoved;
3282 3290                          ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3283 3291                          (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3284 3292                  }
3285 3293  
3286 3294                  /*
3287 3295                   * Unlock the pages allocated by page_create_va()
3288 3296                   * in segmap_pagecreate()
3289 3297                   */
3290 3298                  if (newpage) {
3291 3299                          segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3292 3300                  }
3293 3301  
3294 3302                  if (error) {
3295 3303                          /*
3296 3304                           * If we failed on a write, we may have already
3297 3305                           * allocated file blocks as well as pages.  It's
3298 3306                           * hard to undo the block allocation, but we must
3299 3307                           * be sure to invalidate any pages that may have
3300 3308                           * been allocated.
3301 3309                           */
3302 3310                          (void) segmap_release(segkmap, base, SM_INVAL);
3303 3311                  } else {
3304 3312                          flags = 0;
3305 3313                          /*
3306 3314                           * Force write back for synchronous write cases.
3307 3315                           */
3308 3316                          if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3309 3317                                  /*
3310 3318                                   * If the sticky bit is set but the
3311 3319                                   * execute bit is not set, we do a
3312 3320                                   * synchronous write back and free
3313 3321                                   * the page when done.  We set up swap
3314 3322                                   * files to be handled this way to
3315 3323                                   * prevent servers from keeping around
3316 3324                                   * the client's swap pages too long.
3317 3325                                   * XXX - there ought to be a better way.
3318 3326                                   */
3319 3327                                  if (IS_SWAPVP(vp)) {
3320 3328                                          flags = SM_WRITE | SM_FREE |
3321 3329                                              SM_DONTNEED;
3322 3330                                          iupdat_flag = 0;
3323 3331                                  } else {
3324 3332                                          flags = SM_WRITE;
3325 3333                                  }
3326 3334                          } else if (((mapon + n) == MAXBSIZE) ||
3327 3335                              IS_SWAPVP(vp)) {
3328 3336                                  /*
3329 3337                                   * Have written a whole block.
3330 3338                                   * Start an asynchronous write and
3331 3339                                   * mark the buffer to indicate that
3332 3340                                   * it won't be needed again soon.
3333 3341                                   */
3334 3342                                  flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3335 3343                          }
3336 3344                          error = segmap_release(segkmap, base, flags);
3337 3345  
3338 3346                          /*
3339 3347                           * If the operation failed and is synchronous,
3340 3348                           * then we need to unwind what uiomove() last
3341 3349                           * did so we can potentially return an error to
3342 3350                           * the caller.  If this write operation was
3343 3351                           * done in two pieces and the first succeeded,
3344 3352                           * then we won't return an error for the second
3345 3353                           * piece that failed.  However, we only want to
3346 3354                           * return a resid value that reflects what was
3347 3355                           * really done.
3348 3356                           *
3349 3357                           * Failures for non-synchronous operations can
3350 3358                           * be ignored since the page subsystem will
3351 3359                           * retry the operation until it succeeds or the
3352 3360                           * file system is unmounted.
3353 3361                           */
3354 3362                          if (error) {
3355 3363                                  if ((ioflag & (FSYNC | FDSYNC)) ||
3356 3364                                      ip->i_type == VDIR) {
3357 3365                                          uio->uio_resid = premove_resid;
3358 3366                                  } else {
3359 3367                                          error = 0;
3360 3368                                  }
3361 3369                          }
3362 3370                  }
3363 3371  
3364 3372                  /*
3365 3373                   * Re-acquire contents lock.
3366 3374                   */
3367 3375                  rw_enter(&ip->i_contents, RW_WRITER);
3368 3376                  /*
3369 3377                   * If the uiomove() failed or if a synchronous
3370 3378                   * page push failed, fix up i_size.
3371 3379                   */
3372 3380                  if (error) {
3373 3381                          if (i_size_changed) {
3374 3382                                  /*
3375 3383                                   * The uiomove failed, and we
3376 3384                                   * allocated blocks,so get rid
3377 3385                                   * of them.
3378 3386                                   */
3379 3387                                  (void) ud_itrunc(ip, old_i_size, 0, cr);
3380 3388                          }
3381 3389                  } else {
3382 3390                          /*
3383 3391                           * XXX - Can this be out of the loop?
3384 3392                           */
3385 3393                          ip->i_flag |= IUPD | ICHG;
3386 3394                          if (i_size_changed) {
3387 3395                                  ip->i_flag |= IATTCHG;
3388 3396                          }
3389 3397                          if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3390 3398                              (IEXEC >> 10))) != 0 &&
3391 3399                              (ip->i_char & (ISUID | ISGID)) != 0 &&
3392 3400                              secpolicy_vnode_setid_retain(cr,
3393 3401                              (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3394 3402                                  /*
3395 3403                                   * Clear Set-UID & Set-GID bits on
3396 3404                                   * successful write if not privileged
3397 3405                                   * and at least one of the execute bits
3398 3406                                   * is set.  If we always clear Set-GID,
3399 3407                                   * mandatory file and record locking is
3400 3408                                   * unuseable.
3401 3409                                   */
3402 3410                                  ip->i_char &= ~(ISUID | ISGID);
3403 3411                          }
3404 3412                  }
3405 3413          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3406 3414  
3407 3415  out:
3408 3416          /*
3409 3417           * Inode is updated according to this table -
3410 3418           *
3411 3419           *      FSYNC   FDSYNC(posix.4)
3412 3420           *      --------------------------
3413 3421           *      always@ IATTCHG|IBDWRITE
3414 3422           *
3415 3423           * @ -  If we are doing synchronous write the only time we should
3416 3424           *      not be sync'ing the ip here is if we have the stickyhack
3417 3425           *      activated, the file is marked with the sticky bit and
3418 3426           *      no exec bit, the file length has not been changed and
3419 3427           *      no new blocks have been allocated during this write.
3420 3428           */
3421 3429          if ((ip->i_flag & ISYNC) != 0) {
3422 3430                  /*
3423 3431                   * we have eliminated nosync
3424 3432                   */
3425 3433                  if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3426 3434                      ((ioflag & FSYNC) && iupdat_flag)) {
3427 3435                          ud_iupdat(ip, 1);
3428 3436                  }
3429 3437          }
3430 3438  
3431 3439          /*
3432 3440           * If we've already done a partial-write, terminate
3433 3441           * the write but return no error.
3434 3442           */
3435 3443          if (start_resid != uio->uio_resid) {
3436 3444                  error = 0;
3437 3445          }
3438 3446          ip->i_flag &= ~(INOACC | ISYNC);
3439 3447          ITIMES_NOLOCK(ip);
3440 3448  
3441 3449          return (error);
3442 3450  }
3443 3451  
3444 3452  int32_t
3445 3453  ud_multi_strat(struct ud_inode *ip,
3446 3454          page_t *pp, struct buf *bp, u_offset_t start)
3447 3455  {
3448 3456          daddr_t bn;
3449 3457          int32_t error = 0, io_count, contig, alloc_sz, i;
3450 3458          uint32_t io_off;
3451 3459          mio_master_t *mm = NULL;
3452 3460          mio_slave_t *ms = NULL;
3453 3461          struct buf *rbp;
3454 3462  
3455 3463          ASSERT(!(start & PAGEOFFSET));
3456 3464  
3457 3465          /*
3458 3466           * Figure out how many buffers to allocate
3459 3467           */
3460 3468          io_count = 0;
3461 3469          for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3462 3470                  contig = 0;
3463 3471                  if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3464 3472                      &bn, &contig)) {
3465 3473                          goto end;
3466 3474                  }
3467 3475                  if (contig == 0) {
3468 3476                          goto end;
3469 3477                  }
3470 3478                  contig = MIN(contig, PAGESIZE - io_off);
3471 3479                  if (bn != UDF_HOLE) {
3472 3480                          io_count ++;
3473 3481                  } else {
3474 3482                          /*
3475 3483                           * HOLE
3476 3484                           */
3477 3485                          if (bp->b_flags & B_READ) {
3478 3486  
3479 3487                                  /*
3480 3488                                   * This is a hole and is read
3481 3489                                   * it should be filled with 0's
3482 3490                                   */
3483 3491                                  pagezero(pp, io_off, contig);
3484 3492                          }
3485 3493                  }
3486 3494          }
3487 3495  
3488 3496  
3489 3497          if (io_count != 0) {
3490 3498  
3491 3499                  /*
3492 3500                   * Allocate memory for all the
3493 3501                   * required number of buffers
3494 3502                   */
3495 3503                  alloc_sz = sizeof (mio_master_t) +
3496 3504                      (sizeof (mio_slave_t) * io_count);
3497 3505                  mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3498 3506                  if (mm == NULL) {
3499 3507                          error = ENOMEM;
3500 3508                          goto end;
3501 3509                  }
3502 3510  
3503 3511                  /*
3504 3512                   * initialize master
3505 3513                   */
3506 3514                  mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3507 3515                  mm->mm_size = alloc_sz;
3508 3516                  mm->mm_bp = bp;
3509 3517                  mm->mm_resid = 0;
3510 3518                  mm->mm_error = 0;
3511 3519                  mm->mm_index = master_index++;
3512 3520  
3513 3521                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3514 3522  
3515 3523                  /*
3516 3524                   * Initialize buffers
3517 3525                   */
3518 3526                  io_count = 0;
3519 3527                  for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3520 3528                          contig = 0;
3521 3529                          if (error = ud_bmap_read(ip,
3522 3530                              (u_offset_t)(start + io_off),
3523 3531                              &bn, &contig)) {
3524 3532                                  goto end;
3525 3533                          }
3526 3534                          ASSERT(contig);
3527 3535                          if ((io_off + contig) > bp->b_bcount) {
3528 3536                                  contig = bp->b_bcount - io_off;
3529 3537                          }
3530 3538                          if (bn != UDF_HOLE) {
3531 3539                                  /*
3532 3540                                   * Clone the buffer
3533 3541                                   * and prepare to start I/O
3534 3542                                   */
3535 3543                                  ms->ms_ptr = mm;
3536 3544                                  bioinit(&ms->ms_buf);
3537 3545                                  rbp = bioclone(bp, io_off, (size_t)contig,
3538 3546                                      bp->b_edev, bn, ud_slave_done,
3539 3547                                      &ms->ms_buf, KM_NOSLEEP);
3540 3548                                  ASSERT(rbp == &ms->ms_buf);
3541 3549                                  mm->mm_resid += contig;
3542 3550                                  io_count++;
3543 3551                                  ms ++;
3544 3552                          }
3545 3553                  }
3546 3554  
3547 3555                  /*
3548 3556                   * Start I/O's
3549 3557                   */
3550 3558                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3551 3559                  for (i = 0; i < io_count; i++) {
3552 3560                          (void) bdev_strategy(&ms->ms_buf);
3553 3561                          ms ++;
3554 3562                  }
3555 3563          }
3556 3564  
3557 3565  end:
3558 3566          if (error != 0) {
3559 3567                  bp->b_flags |= B_ERROR;
3560 3568                  bp->b_error = error;
3561 3569                  if (mm != NULL) {
3562 3570                          mutex_destroy(&mm->mm_mutex);
3563 3571                          kmem_free(mm, mm->mm_size);
3564 3572                  }
3565 3573          }
3566 3574          return (error);
3567 3575  }
3568 3576  
3569 3577  int32_t
3570 3578  ud_slave_done(struct buf *bp)
3571 3579  {
3572 3580          mio_master_t *mm;
3573 3581          int32_t resid;
3574 3582  
3575 3583          ASSERT(SEMA_HELD(&bp->b_sem));
3576 3584          ASSERT((bp->b_flags & B_DONE) == 0);
3577 3585  
3578 3586          mm = ((mio_slave_t *)bp)->ms_ptr;
3579 3587  
3580 3588          /*
3581 3589           * Propagate error and byte count info from slave struct to
3582 3590           * the master struct
3583 3591           */
3584 3592          mutex_enter(&mm->mm_mutex);
3585 3593          if (bp->b_flags & B_ERROR) {
3586 3594  
3587 3595                  /*
3588 3596                   * If multiple slave buffers get
3589 3597                   * error we forget the old errors
3590 3598                   * this is ok because we any way
3591 3599                   * cannot return multiple errors
3592 3600                   */
3593 3601                  mm->mm_error = bp->b_error;
3594 3602          }
3595 3603          mm->mm_resid -= bp->b_bcount;
3596 3604          resid = mm->mm_resid;
3597 3605          mutex_exit(&mm->mm_mutex);
3598 3606  
3599 3607          /*
3600 3608           * free up the resources allocated to cloned buffers.
3601 3609           */
3602 3610          bp_mapout(bp);
3603 3611          biofini(bp);
3604 3612  
3605 3613          if (resid == 0) {
3606 3614  
3607 3615                  /*
3608 3616                   * This is the last I/O operation
3609 3617                   * clean up and return the original buffer
3610 3618                   */
3611 3619                  if (mm->mm_error) {
3612 3620                          mm->mm_bp->b_flags |= B_ERROR;
3613 3621                          mm->mm_bp->b_error = mm->mm_error;
3614 3622                  }
3615 3623                  biodone(mm->mm_bp);
3616 3624                  mutex_destroy(&mm->mm_mutex);
3617 3625                  kmem_free(mm, mm->mm_size);
3618 3626          }
3619 3627          return (0);
3620 3628  }

↓ open down ↓

1957 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX