io-lx-public New usr/src/uts/common/fs/udfs/udf

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2015, Joyent, Inc.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/t_lock.h>
  32 #include <sys/param.h>
  33 #include <sys/time.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/resource.h>
  37 #include <sys/signal.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/buf.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vfs_opreg.h>
  43 #include <sys/stat.h>
  44 #include <sys/vnode.h>
  45 #include <sys/mode.h>
  46 #include <sys/proc.h>
  47 #include <sys/disp.h>
  48 #include <sys/file.h>
  49 #include <sys/fcntl.h>
  50 #include <sys/flock.h>
  51 #include <sys/kmem.h>
  52 #include <sys/uio.h>
  53 #include <sys/dnlc.h>
  54 #include <sys/conf.h>
  55 #include <sys/errno.h>
  56 #include <sys/mman.h>
  57 #include <sys/fbuf.h>
  58 #include <sys/pathname.h>
  59 #include <sys/debug.h>
  60 #include <sys/vmsystm.h>
  61 #include <sys/cmn_err.h>
  62 #include <sys/dirent.h>
  63 #include <sys/errno.h>
  64 #include <sys/modctl.h>
  65 #include <sys/statvfs.h>
  66 #include <sys/mount.h>
  67 #include <sys/sunddi.h>
  68 #include <sys/bootconf.h>
  69 #include <sys/policy.h>
  70 
  71 #include <vm/hat.h>
  72 #include <vm/page.h>
  73 #include <vm/pvn.h>
  74 #include <vm/as.h>
  75 #include <vm/seg.h>
  76 #include <vm/seg_map.h>
  77 #include <vm/seg_kmem.h>
  78 #include <vm/seg_vn.h>
  79 #include <vm/rm.h>
  80 #include <vm/page.h>
  81 #include <sys/swap.h>
  82 
  83 #include <fs/fs_subr.h>
  84 
  85 #include <sys/fs/udf_volume.h>
  86 #include <sys/fs/udf_inode.h>
  87 
  88 static int32_t udf_open(struct vnode **,
  89         int32_t, struct cred *, caller_context_t *);
  90 static int32_t udf_close(struct vnode *,
  91         int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92 static int32_t udf_read(struct vnode *,
  93         struct uio *, int32_t, struct cred *, caller_context_t *);
  94 static int32_t udf_write(struct vnode *,
  95         struct uio *, int32_t, struct cred *, caller_context_t *);
  96 static int32_t udf_ioctl(struct vnode *,
  97         int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98         caller_context_t *);
  99 static int32_t udf_getattr(struct vnode *,
 100         struct vattr *, int32_t, struct cred *, caller_context_t *);
 101 static int32_t udf_setattr(struct vnode *,
 102         struct vattr *, int32_t, struct cred *, caller_context_t *);
 103 static int32_t udf_access(struct vnode *,
 104         int32_t, int32_t, struct cred *, caller_context_t *);
 105 static int32_t udf_lookup(struct vnode *,
 106         char *, struct vnode **, struct pathname *,
 107         int32_t, struct vnode *, struct cred *,
 108         caller_context_t *, int *, pathname_t *);
 109 static int32_t udf_create(struct vnode *,
 110         char *, struct vattr *, enum vcexcl,
 111         int32_t, struct vnode **, struct cred *, int32_t,
 112         caller_context_t *, vsecattr_t *);
 113 static int32_t udf_remove(struct vnode *,
 114         char *, struct cred *, caller_context_t *, int);
 115 static int32_t udf_link(struct vnode *,
 116         struct vnode *, char *, struct cred *, caller_context_t *, int);
 117 static int32_t udf_rename(struct vnode *,
 118         char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119 static int32_t udf_mkdir(struct vnode *,
 120         char *, struct vattr *, struct vnode **, struct cred *,
 121         caller_context_t *, int, vsecattr_t *);
 122 static int32_t udf_rmdir(struct vnode *,
 123         char *, struct vnode *, struct cred *, caller_context_t *, int);
 124 static int32_t udf_readdir(struct vnode *,
 125         struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126 static int32_t udf_symlink(struct vnode *,
 127         char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128 static int32_t udf_readlink(struct vnode *,
 129         struct uio *, struct cred *, caller_context_t *);
 130 static int32_t udf_fsync(struct vnode *,
 131         int32_t, struct cred *, caller_context_t *);
 132 static void udf_inactive(struct vnode *,
 133         struct cred *, caller_context_t *);
 134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138         caller_context_t *);
 139 static int32_t udf_frlock(struct vnode *, int32_t,
 140         struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141         caller_context_t *);
 142 static int32_t udf_space(struct vnode *, int32_t,
 143         struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144 static int32_t udf_getpage(struct vnode *, offset_t,
 145         size_t, uint32_t *, struct page **, size_t,
 146         struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147 static int32_t udf_putpage(struct vnode *, offset_t,
 148         size_t, int32_t, struct cred *, caller_context_t *);
 149 static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150         caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151         caller_context_t *);
 152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153         caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154         caller_context_t *);
 155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156         caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157         caller_context_t *);
 158 static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159         ulong_t *, struct cred *, caller_context_t *);
 160 static int32_t udf_pageio(struct vnode *, struct page *,
 161         u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162 
 163 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164         size_t, struct seg *, caddr_t, page_t *pl[],
 165         size_t, enum seg_rw, int32_t);
 166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168 int32_t ud_page_fill(struct ud_inode *, page_t *,
 169         u_offset_t, uint32_t, u_offset_t *);
 170 int32_t ud_iodone(struct buf *);
 171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174 int32_t ud_slave_done(struct buf *);
 175 
 176 /*
 177  * Structures to control multiple IO operations to get or put pages
 178  * that are backed by discontiguous blocks. The master struct is
 179  * a dummy that holds the original bp from pageio_setup. The
 180  * slave struct holds the working bp's to do the actual IO. Once
 181  * all the slave IOs complete. The master is processed as if a single
 182  * IO op has completed.
 183  */
 184 uint32_t master_index = 0;
 185 typedef struct mio_master {
 186         kmutex_t        mm_mutex;       /* protect the fields below */
 187         int32_t         mm_size;
 188         buf_t           *mm_bp;         /* original bp */
 189         int32_t         mm_resid;       /* bytes remaining to transfer */
 190         int32_t         mm_error;       /* accumulated error from slaves */
 191         int32_t         mm_index;       /* XXX debugging */
 192 } mio_master_t;
 193 
 194 typedef struct mio_slave {
 195         buf_t           ms_buf;         /* working buffer for this IO chunk */
 196         mio_master_t    *ms_ptr;        /* pointer to master */
 197 } mio_slave_t;
 198 
 199 struct vnodeops *udf_vnodeops;
 200 
 201 const fs_operation_def_t udf_vnodeops_template[] = {
 202         VOPNAME_OPEN,           { .vop_open = udf_open },
 203         VOPNAME_CLOSE,          { .vop_close = udf_close },
 204         VOPNAME_READ,           { .vop_read = udf_read },
 205         VOPNAME_WRITE,          { .vop_write = udf_write },
 206         VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207         VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208         VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209         VOPNAME_ACCESS,         { .vop_access = udf_access },
 210         VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211         VOPNAME_CREATE,         { .vop_create = udf_create },
 212         VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213         VOPNAME_LINK,           { .vop_link = udf_link },
 214         VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215         VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216         VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217         VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218         VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219         VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220         VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221         VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222         VOPNAME_FID,            { .vop_fid = udf_fid },
 223         VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224         VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225         VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226         VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227         VOPNAME_SPACE,          { .vop_space = udf_space },
 228         VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229         VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230         VOPNAME_MAP,            { .vop_map = udf_map },
 231         VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232         VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233         VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234         VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236         NULL,                   NULL
 237 };
 238 
 239 /* ARGSUSED */
 240 static int32_t
 241 udf_open(
 242         struct vnode **vpp,
 243         int32_t flag,
 244         struct cred *cr,
 245         caller_context_t *ct)
 246 {
 247         ud_printf("udf_open\n");
 248 
 249         return (0);
 250 }
 251 
 252 /* ARGSUSED */
 253 static int32_t
 254 udf_close(
 255         struct vnode *vp,
 256         int32_t flag,
 257         int32_t count,
 258         offset_t offset,
 259         struct cred *cr,
 260         caller_context_t *ct)
 261 {
 262         struct ud_inode *ip = VTOI(vp);
 263 
 264         ud_printf("udf_close\n");
 265 
 266         ITIMES(ip);
 267 
 268         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269         cleanshares(vp, ttoproc(curthread)->p_pid);
 270 
 271         /*
 272          * Push partially filled cluster at last close.
 273          * ``last close'' is approximated because the dnlc
 274          * may have a hold on the vnode.
 275          */
 276         if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277                 struct ud_inode *ip = VTOI(vp);
 278                 if (ip->i_delaylen) {
 279                         (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280                             B_ASYNC | B_FREE, cr);
 281                         ip->i_delaylen = 0;
 282                 }
 283         }
 284 
 285         return (0);
 286 }
 287 
 288 /* ARGSUSED */
 289 static int32_t
 290 udf_read(
 291         struct vnode *vp,
 292         struct uio *uiop,
 293         int32_t ioflag,
 294         struct cred *cr,
 295         caller_context_t *ct)
 296 {
 297         struct ud_inode *ip = VTOI(vp);
 298         int32_t error;
 299 
 300         ud_printf("udf_read\n");
 301 
 302 #ifdef  __lock_lint
 303         rw_enter(&ip->i_rwlock, RW_READER);
 304 #endif
 305 
 306         ASSERT(RW_READ_HELD(&ip->i_rwlock));
 307 
 308         if (MANDLOCK(vp, ip->i_char)) {
 309                 /*
 310                  * udf_getattr ends up being called by chklock
 311                  */
 312                 error = chklock(vp, FREAD, uiop->uio_loffset,
 313                     uiop->uio_resid, uiop->uio_fmode, ct);
 314                 if (error) {
 315                         goto end;
 316                 }
 317         }
 318 
 319         rw_enter(&ip->i_contents, RW_READER);
 320         error = ud_rdip(ip, uiop, ioflag, cr);
 321         rw_exit(&ip->i_contents);
 322 
 323 end:
 324 #ifdef  __lock_lint
 325         rw_exit(&ip->i_rwlock);
 326 #endif
 327 
 328         return (error);
 329 }
 330 
 331 
 332 int32_t ud_WRITES = 1;
 333 int32_t ud_HW = 96 * 1024;
 334 int32_t ud_LW = 64 * 1024;
 335 int32_t ud_throttles = 0;
 336 
 337 /* ARGSUSED */
 338 static int32_t
 339 udf_write(
 340         struct vnode *vp,
 341         struct uio *uiop,
 342         int32_t ioflag,
 343         struct cred *cr,
 344         caller_context_t *ct)
 345 {
 346         struct ud_inode *ip = VTOI(vp);
 347         int32_t error = 0;
 348 
 349         ud_printf("udf_write\n");
 350 
 351 #ifdef  __lock_lint
 352         rw_enter(&ip->i_rwlock, RW_WRITER);
 353 #endif
 354 
 355         ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 356 
 357         if (MANDLOCK(vp, ip->i_char)) {
 358                 /*
 359                  * ud_getattr ends up being called by chklock
 360                  */
 361                 error = chklock(vp, FWRITE, uiop->uio_loffset,
 362                     uiop->uio_resid, uiop->uio_fmode, ct);
 363                 if (error) {
 364                         goto end;
 365                 }
 366         }
 367         /*
 368          * Throttle writes.
 369          */
 370         mutex_enter(&ip->i_tlock);
 371         if (ud_WRITES && (ip->i_writes > ud_HW)) {
 372                 while (ip->i_writes > ud_HW) {
 373                         ud_throttles++;
 374                         cv_wait(&ip->i_wrcv, &ip->i_tlock);
 375                 }
 376         }
 377         mutex_exit(&ip->i_tlock);
 378 
 379         /*
 380          * Write to the file
 381          */
 382         rw_enter(&ip->i_contents, RW_WRITER);
 383         if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 384                 /*
 385                  * In append mode start at end of file.
 386                  */
 387                 uiop->uio_loffset = ip->i_size;
 388         }
 389         error = ud_wrip(ip, uiop, ioflag, cr);
 390         rw_exit(&ip->i_contents);
 391 
 392 end:
 393 #ifdef  __lock_lint
 394         rw_exit(&ip->i_rwlock);
 395 #endif
 396 
 397         return (error);
 398 }
 399 
 400 /* ARGSUSED */
 401 static int32_t
 402 udf_ioctl(
 403         struct vnode *vp,
 404         int32_t cmd,
 405         intptr_t arg,
 406         int32_t flag,
 407         struct cred *cr,
 408         int32_t *rvalp,
 409         caller_context_t *ct)
 410 {
 411         return (ENOTTY);
 412 }
 413 
 414 /* ARGSUSED */
 415 static int32_t
 416 udf_getattr(
 417         struct vnode *vp,
 418         struct vattr *vap,
 419         int32_t flags,
 420         struct cred *cr,
 421         caller_context_t *ct)
 422 {
 423         struct ud_inode *ip = VTOI(vp);
 424 
 425         ud_printf("udf_getattr\n");
 426 
 427         if (vap->va_mask == AT_SIZE) {
 428                 /*
 429                  * for performance, if only the size is requested don't bother
 430                  * with anything else.
 431                  */
 432                 vap->va_size = ip->i_size;
 433                 return (0);
 434         }
 435 
 436         rw_enter(&ip->i_contents, RW_READER);
 437 
 438         vap->va_type = vp->v_type;
 439         vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 440 
 441         vap->va_uid = ip->i_uid;
 442         vap->va_gid = ip->i_gid;
 443         vap->va_fsid = ip->i_dev;
 444         vap->va_nodeid = ip->i_icb_lbano;
 445         vap->va_nlink = ip->i_nlink;
 446         vap->va_size = ip->i_size;
 447         vap->va_seq = ip->i_seq;
 448         if (vp->v_type == VCHR || vp->v_type == VBLK) {
 449                 vap->va_rdev = ip->i_rdev;
 450         } else {
 451                 vap->va_rdev = 0;
 452         }
 453 
 454         mutex_enter(&ip->i_tlock);
 455         ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 456         vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 457         vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 458         vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 459         vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 460         vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 461         vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 462         mutex_exit(&ip->i_tlock);
 463 
 464         switch (ip->i_type) {
 465                 case VBLK:
 466                         vap->va_blksize = MAXBSIZE;
 467                         break;
 468                 case VCHR:
 469                         vap->va_blksize = MAXBSIZE;
 470                         break;
 471                 default:
 472                         vap->va_blksize = ip->i_udf->udf_lbsize;
 473                         break;
 474         }
 475         vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 476 
 477         rw_exit(&ip->i_contents);
 478 
 479         return (0);
 480 }
 481 
 482 static int
 483 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 484 {
 485         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 486 }
 487 
 488 /*ARGSUSED4*/
 489 static int32_t
 490 udf_setattr(
 491         struct vnode *vp,
 492         struct vattr *vap,
 493         int32_t flags,
 494         struct cred *cr,
 495         caller_context_t *ct)
 496 {
 497         int32_t error = 0;
 498         uint32_t mask = vap->va_mask;
 499         struct ud_inode *ip;
 500         timestruc_t now;
 501         struct vattr ovap;
 502 
 503         ud_printf("udf_setattr\n");
 504 
 505         ip = VTOI(vp);
 506 
 507         /*
 508          * not updates allowed to 4096 files
 509          */
 510         if (ip->i_astrat == STRAT_TYPE4096) {
 511                 return (EINVAL);
 512         }
 513 
 514         /*
 515          * Cannot set these attributes
 516          */
 517         if (mask & AT_NOSET) {
 518                 return (EINVAL);
 519         }
 520 
 521         rw_enter(&ip->i_rwlock, RW_WRITER);
 522         rw_enter(&ip->i_contents, RW_WRITER);
 523 
 524         ovap.va_uid = ip->i_uid;
 525         ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 526         error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 527             ud_iaccess_vmode, ip);
 528         if (error)
 529                 goto update_inode;
 530 
 531         mask = vap->va_mask;
 532         /*
 533          * Change file access modes.
 534          */
 535         if (mask & AT_MODE) {
 536                 ip->i_perm = VA2UD_PERM(vap->va_mode);
 537                 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 538                 mutex_enter(&ip->i_tlock);
 539                 ip->i_flag |= ICHG;
 540                 mutex_exit(&ip->i_tlock);
 541         }
 542         if (mask & (AT_UID|AT_GID)) {
 543                 if (mask & AT_UID) {
 544                         ip->i_uid = vap->va_uid;
 545                 }
 546                 if (mask & AT_GID) {
 547                         ip->i_gid = vap->va_gid;
 548                 }
 549                 mutex_enter(&ip->i_tlock);
 550                 ip->i_flag |= ICHG;
 551                 mutex_exit(&ip->i_tlock);
 552         }
 553         /*
 554          * Truncate file.  Must have write permission and not be a directory.
 555          */
 556         if (mask & AT_SIZE) {
 557                 if (vp->v_type == VDIR) {
 558                         error = EISDIR;
 559                         goto update_inode;
 560                 }
 561                 if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
 562                         goto update_inode;
 563                 }
 564                 if (vap->va_size > MAXOFFSET_T) {
 565                         error = EFBIG;
 566                         goto update_inode;
 567                 }
 568                 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 569                         goto update_inode;
 570                 }
 571 
 572                 if (vap->va_size == 0) {
 573                         vnevent_truncate(vp, ct);
 574                 } else {
 575                         vnevent_resize(vp, ct);
 576                 }
 577         }
 578         /*
 579          * Change file access or modified times.
 580          */
 581         if (mask & (AT_ATIME|AT_MTIME)) {
 582                 mutex_enter(&ip->i_tlock);
 583                 if (mask & AT_ATIME) {
 584                         ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 585                         ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 586                         ip->i_flag &= ~IACC;
 587                 }
 588                 if (mask & AT_MTIME) {
 589                         ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 590                         ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 591                         gethrestime(&now);
 592                         ip->i_ctime.tv_sec = now.tv_sec;
 593                         ip->i_ctime.tv_nsec = now.tv_nsec;
 594                         ip->i_flag &= ~(IUPD|ICHG);
 595                         ip->i_flag |= IMODTIME;
 596                 }
 597                 ip->i_flag |= IMOD;
 598                 mutex_exit(&ip->i_tlock);
 599         }
 600 
 601 update_inode:
 602         if (curthread->t_flag & T_DONTPEND) {
 603                 ud_iupdat(ip, 1);
 604         } else {
 605                 ITIMES_NOLOCK(ip);
 606         }
 607         rw_exit(&ip->i_contents);
 608         rw_exit(&ip->i_rwlock);
 609 
 610         return (error);
 611 }
 612 
 613 /* ARGSUSED */
 614 static int32_t
 615 udf_access(
 616         struct vnode *vp,
 617         int32_t mode,
 618         int32_t flags,
 619         struct cred *cr,
 620         caller_context_t *ct)
 621 {
 622         struct ud_inode *ip = VTOI(vp);
 623 
 624         ud_printf("udf_access\n");
 625 
 626         if (ip->i_udf == NULL) {
 627                 return (EIO);
 628         }
 629 
 630         return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 631 }
 632 
 633 int32_t udfs_stickyhack = 1;
 634 
 635 /* ARGSUSED */
 636 static int32_t
 637 udf_lookup(
 638         struct vnode *dvp,
 639         char *nm,
 640         struct vnode **vpp,
 641         struct pathname *pnp,
 642         int32_t flags,
 643         struct vnode *rdir,
 644         struct cred *cr,
 645         caller_context_t *ct,
 646         int *direntflags,
 647         pathname_t *realpnp)
 648 {
 649         int32_t error;
 650         struct vnode *vp;
 651         struct ud_inode *ip, *xip;
 652 
 653         ud_printf("udf_lookup\n");
 654         /*
 655          * Null component name is a synonym for directory being searched.
 656          */
 657         if (*nm == '\0') {
 658                 VN_HOLD(dvp);
 659                 *vpp = dvp;
 660                 error = 0;
 661                 goto out;
 662         }
 663 
 664         /*
 665          * Fast path: Check the directory name lookup cache.
 666          */
 667         ip = VTOI(dvp);
 668         if (vp = dnlc_lookup(dvp, nm)) {
 669                 /*
 670                  * Check accessibility of directory.
 671                  */
 672                 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 673                         VN_RELE(vp);
 674                 }
 675                 xip = VTOI(vp);
 676         } else {
 677                 error = ud_dirlook(ip, nm, &xip, cr, 1);
 678                 ITIMES(ip);
 679         }
 680 
 681         if (error == 0) {
 682                 ip = xip;
 683                 *vpp = ITOV(ip);
 684                 if ((ip->i_type != VDIR) &&
 685                     (ip->i_char & ISVTX) &&
 686                     ((ip->i_perm & IEXEC) == 0) &&
 687                     udfs_stickyhack) {
 688                         mutex_enter(&(*vpp)->v_lock);
 689                         (*vpp)->v_flag |= VISSWAP;
 690                         mutex_exit(&(*vpp)->v_lock);
 691                 }
 692                 ITIMES(ip);
 693                 /*
 694                  * If vnode is a device return special vnode instead.
 695                  */
 696                 if (IS_DEVVP(*vpp)) {
 697                         struct vnode *newvp;
 698                         newvp = specvp(*vpp, (*vpp)->v_rdev,
 699                             (*vpp)->v_type, cr);
 700                         VN_RELE(*vpp);
 701                         if (newvp == NULL) {
 702                                 error = ENOSYS;
 703                         } else {
 704                                 *vpp = newvp;
 705                         }
 706                 }
 707         }
 708 out:
 709         return (error);
 710 }
 711 
 712 /* ARGSUSED */
 713 static int32_t
 714 udf_create(
 715         struct vnode *dvp,
 716         char *name,
 717         struct vattr *vap,
 718         enum vcexcl excl,
 719         int32_t mode,
 720         struct vnode **vpp,
 721         struct cred *cr,
 722         int32_t flag,
 723         caller_context_t *ct,
 724         vsecattr_t *vsecp)
 725 {
 726         int32_t error;
 727         struct ud_inode *ip = VTOI(dvp), *xip;
 728 
 729         ud_printf("udf_create\n");
 730 
 731         if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 732                 vap->va_mode &= ~VSVTX;
 733 
 734         if (*name == '\0') {
 735                 /*
 736                  * Null component name refers to the directory itself.
 737                  */
 738                 VN_HOLD(dvp);
 739                 ITIMES(ip);
 740                 error = EEXIST;
 741         } else {
 742                 xip = NULL;
 743                 rw_enter(&ip->i_rwlock, RW_WRITER);
 744                 error = ud_direnter(ip, name, DE_CREATE,
 745                     (struct ud_inode *)0, (struct ud_inode *)0,
 746                     vap, &xip, cr, ct);
 747                 rw_exit(&ip->i_rwlock);
 748                 ITIMES(ip);
 749                 ip = xip;
 750         }
 751 #ifdef  __lock_lint
 752         rw_enter(&ip->i_contents, RW_WRITER);
 753 #else
 754         if (ip != NULL) {
 755                 rw_enter(&ip->i_contents, RW_WRITER);
 756         }
 757 #endif
 758 
 759         /*
 760          * If the file already exists and this is a non-exclusive create,
 761          * check permissions and allow access for non-directories.
 762          * Read-only create of an existing directory is also allowed.
 763          * We fail an exclusive create of anything which already exists.
 764          */
 765         if (error == EEXIST) {
 766                 if (excl == NONEXCL) {
 767                         if ((ip->i_type == VDIR) && (mode & VWRITE)) {
 768                                 error = EISDIR;
 769                         } else if (mode) {
 770                                 error = ud_iaccess(ip,
 771                                     UD_UPERM2DPERM(mode), cr, 0);
 772                         } else {
 773                                 error = 0;
 774                         }
 775                 }
 776                 if (error) {
 777                         rw_exit(&ip->i_contents);
 778                         VN_RELE(ITOV(ip));
 779                         goto out;
 780                 } else if ((ip->i_type == VREG) &&
 781                     (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 782                         /*
 783                          * Truncate regular files, if requested by caller.
 784                          * Grab i_rwlock to make sure no one else is
 785                          * currently writing to the file (we promised
 786                          * bmap we would do this).
 787                          * Must get the locks in the correct order.
 788                          */
 789                         if (ip->i_size == 0) {
 790                                 ip->i_flag |= ICHG | IUPD;
 791                         } else {
 792                                 rw_exit(&ip->i_contents);
 793                                 rw_enter(&ip->i_rwlock, RW_WRITER);
 794                                 rw_enter(&ip->i_contents, RW_WRITER);
 795                                 (void) ud_itrunc(ip, 0, 0, cr);
 796                                 rw_exit(&ip->i_rwlock);
 797                         }
 798                         vnevent_create(ITOV(ip), ct);
 799                 }
 800         }
 801 
 802         if (error == 0) {
 803                 *vpp = ITOV(ip);
 804                 ITIMES(ip);
 805         }
 806 #ifdef  __lock_lint
 807         rw_exit(&ip->i_contents);
 808 #else
 809         if (ip != NULL) {
 810                 rw_exit(&ip->i_contents);
 811         }
 812 #endif
 813         if (error) {
 814                 goto out;
 815         }
 816 
 817         /*
 818          * If vnode is a device return special vnode instead.
 819          */
 820         if (!error && IS_DEVVP(*vpp)) {
 821                 struct vnode *newvp;
 822 
 823                 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 824                 VN_RELE(*vpp);
 825                 if (newvp == NULL) {
 826                         error = ENOSYS;
 827                         goto out;
 828                 }
 829                 *vpp = newvp;
 830         }
 831 out:
 832         return (error);
 833 }
 834 
 835 /* ARGSUSED */
 836 static int32_t
 837 udf_remove(
 838         struct vnode *vp,
 839         char *nm,
 840         struct cred *cr,
 841         caller_context_t *ct,
 842         int flags)
 843 {
 844         int32_t error;
 845         struct ud_inode *ip = VTOI(vp);
 846 
 847         ud_printf("udf_remove\n");
 848 
 849         rw_enter(&ip->i_rwlock, RW_WRITER);
 850         error = ud_dirremove(ip, nm,
 851             (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 852         rw_exit(&ip->i_rwlock);
 853         ITIMES(ip);
 854 
 855         return (error);
 856 }
 857 
 858 /* ARGSUSED */
 859 static int32_t
 860 udf_link(
 861         struct vnode *tdvp,
 862         struct vnode *svp,
 863         char *tnm,
 864         struct cred *cr,
 865         caller_context_t *ct,
 866         int flags)
 867 {
 868         int32_t error;
 869         struct vnode *realvp;
 870         struct ud_inode *sip;
 871         struct ud_inode *tdp;
 872 
 873         ud_printf("udf_link\n");
 874         if (VOP_REALVP(svp, &realvp, ct) == 0) {
 875                 svp = realvp;
 876         }
 877 
 878         /*
 879          * Do not allow links to directories
 880          */
 881         if (svp->v_type == VDIR) {
 882                 return (EPERM);
 883         }
 884 
 885         sip = VTOI(svp);
 886 
 887         if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 888                 return (EPERM);
 889 
 890         tdp = VTOI(tdvp);
 891 
 892         rw_enter(&tdp->i_rwlock, RW_WRITER);
 893         error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 894             sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 895         rw_exit(&tdp->i_rwlock);
 896         ITIMES(sip);
 897         ITIMES(tdp);
 898 
 899         if (error == 0) {
 900                 vnevent_link(svp, ct);
 901         }
 902 
 903         return (error);
 904 }
 905 
 906 /* ARGSUSED */
 907 static int32_t
 908 udf_rename(
 909         struct vnode *sdvp,
 910         char *snm,
 911         struct vnode *tdvp,
 912         char *tnm,
 913         struct cred *cr,
 914         caller_context_t *ct,
 915         int flags)
 916 {
 917         int32_t error = 0;
 918         struct udf_vfs *udf_vfsp;
 919         struct ud_inode *sip;           /* source inode */
 920         struct ud_inode *tip;           /* target inode */
 921         struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 922         struct vnode *realvp;
 923 
 924         ud_printf("udf_rename\n");
 925 
 926         if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 927                 tdvp = realvp;
 928         }
 929 
 930         sdp = VTOI(sdvp);
 931         tdp = VTOI(tdvp);
 932 
 933         udf_vfsp = sdp->i_udf;
 934 
 935         mutex_enter(&udf_vfsp->udf_rename_lck);
 936         /*
 937          * Look up inode of file we're supposed to rename.
 938          */
 939         if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 940                 mutex_exit(&udf_vfsp->udf_rename_lck);
 941                 return (error);
 942         }
 943         /*
 944          * be sure this is not a directory with another file system mounted
 945          * over it.  If it is just give up the locks, and return with
 946          * EBUSY
 947          */
 948         if (vn_mountedvfs(ITOV(sip)) != NULL) {
 949                 error = EBUSY;
 950                 goto errout;
 951         }
 952         /*
 953          * Make sure we can delete the source entry.  This requires
 954          * write permission on the containing directory.  If that
 955          * directory is "sticky" it further requires (except for
 956          * privileged users) that the user own the directory or the
 957          * source entry, or else have permission to write the source
 958          * entry.
 959          */
 960         rw_enter(&sdp->i_contents, RW_READER);
 961         rw_enter(&sip->i_contents, RW_READER);
 962         if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 963             (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 964                 rw_exit(&sip->i_contents);
 965                 rw_exit(&sdp->i_contents);
 966                 ITIMES(sip);
 967                 goto errout;
 968         }
 969 
 970         /*
 971          * Check for renaming '.' or '..' or alias of '.'
 972          */
 973         if ((strcmp(snm, ".") == 0) ||
 974             (strcmp(snm, "..") == 0) ||
 975             (sdp == sip)) {
 976                 error = EINVAL;
 977                 rw_exit(&sip->i_contents);
 978                 rw_exit(&sdp->i_contents);
 979                 goto errout;
 980         }
 981 
 982         rw_exit(&sip->i_contents);
 983         rw_exit(&sdp->i_contents);
 984 
 985         if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 986                 vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 987                 VN_RELE(ITOV(tip));
 988         }
 989 
 990         /* Notify the target dir. if not the same as the source dir. */
 991         if (sdvp != tdvp)
 992                 vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 993 
 994         vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 995 
 996         /*
 997          * Link source to the target.
 998          */
 999         rw_enter(&tdp->i_rwlock, RW_WRITER);
1000         if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
1001             (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
1002                 /*
1003                  * ESAME isn't really an error; it indicates that the
1004                  * operation should not be done because the source and target
1005                  * are the same file, but that no error should be reported.
1006                  */
1007                 if (error == ESAME) {
1008                         error = 0;
1009                 }
1010                 rw_exit(&tdp->i_rwlock);
1011                 goto errout;
1012         }
1013         rw_exit(&tdp->i_rwlock);
1014 
1015         rw_enter(&sdp->i_rwlock, RW_WRITER);
1016         /*
1017          * Unlink the source.
1018          * Remove the source entry.  ud_dirremove() checks that the entry
1019          * still reflects sip, and returns an error if it doesn't.
1020          * If the entry has changed just forget about it.  Release
1021          * the source inode.
1022          */
1023         if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1024             DR_RENAME, cr, ct)) == ENOENT) {
1025                 error = 0;
1026         }
1027         rw_exit(&sdp->i_rwlock);
1028 
1029         if (error == 0) {
1030                 vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1031                 /*
1032                  * vnevent_rename_dest and vnevent_rename_dest_dir are called
1033                  * in ud_direnter().
1034                  */
1035         }
1036 
1037 errout:
1038         ITIMES(sdp);
1039         ITIMES(tdp);
1040         VN_RELE(ITOV(sip));
1041         mutex_exit(&udf_vfsp->udf_rename_lck);
1042 
1043         return (error);
1044 }
1045 
1046 /* ARGSUSED */
1047 static int32_t
1048 udf_mkdir(
1049         struct vnode *dvp,
1050         char *dirname,
1051         struct vattr *vap,
1052         struct vnode **vpp,
1053         struct cred *cr,
1054         caller_context_t *ct,
1055         int flags,
1056         vsecattr_t *vsecp)
1057 {
1058         int32_t error;
1059         struct ud_inode *ip;
1060         struct ud_inode *xip;
1061 
1062         ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1063 
1064         ud_printf("udf_mkdir\n");
1065 
1066         ip = VTOI(dvp);
1067         rw_enter(&ip->i_rwlock, RW_WRITER);
1068         error = ud_direnter(ip, dirname, DE_MKDIR,
1069             (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1070         rw_exit(&ip->i_rwlock);
1071         ITIMES(ip);
1072         if (error == 0) {
1073                 ip = xip;
1074                 *vpp = ITOV(ip);
1075                 ITIMES(ip);
1076         } else if (error == EEXIST) {
1077                 ITIMES(xip);
1078                 VN_RELE(ITOV(xip));
1079         }
1080 
1081         return (error);
1082 }
1083 
1084 /* ARGSUSED */
1085 static int32_t
1086 udf_rmdir(
1087         struct vnode *vp,
1088         char *nm,
1089         struct vnode *cdir,
1090         struct cred *cr,
1091         caller_context_t *ct,
1092         int flags)
1093 {
1094         int32_t error;
1095         struct ud_inode *ip = VTOI(vp);
1096 
1097         ud_printf("udf_rmdir\n");
1098 
1099         rw_enter(&ip->i_rwlock, RW_WRITER);
1100         error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1101             cr, ct);
1102         rw_exit(&ip->i_rwlock);
1103         ITIMES(ip);
1104 
1105         return (error);
1106 }
1107 
1108 /* ARGSUSED */
1109 static int32_t
1110 udf_readdir(
1111         struct vnode *vp,
1112         struct uio *uiop,
1113         struct cred *cr,
1114         int32_t *eofp,
1115         caller_context_t *ct,
1116         int flags)
1117 {
1118         struct ud_inode *ip;
1119         struct dirent64 *nd;
1120         struct udf_vfs *udf_vfsp;
1121         int32_t error = 0, len, outcount = 0;
1122         uint32_t dirsiz, offset;
1123         uint32_t bufsize, ndlen, dummy;
1124         caddr_t outbuf;
1125         caddr_t outb, end_outb;
1126         struct iovec *iovp;
1127 
1128         uint8_t *dname;
1129         int32_t length;
1130 
1131         uint8_t *buf = NULL;
1132 
1133         struct fbuf *fbp = NULL;
1134         struct file_id *fid;
1135         uint8_t *name;
1136 
1137 
1138         ud_printf("udf_readdir\n");
1139 
1140         ip = VTOI(vp);
1141         udf_vfsp = ip->i_udf;
1142 
1143         dirsiz = ip->i_size;
1144         if ((uiop->uio_offset >= dirsiz) ||
1145             (ip->i_nlink <= 0)) {
1146                 if (eofp) {
1147                         *eofp = 1;
1148                 }
1149                 return (0);
1150         }
1151 
1152         offset = uiop->uio_offset;
1153         iovp = uiop->uio_iov;
1154         bufsize = iovp->iov_len;
1155 
1156         outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1157         end_outb = outb + bufsize;
1158         nd = (struct dirent64 *)outbuf;
1159 
1160         dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1161         buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1162 
1163         if (offset == 0) {
1164                 len = DIRENT64_RECLEN(1);
1165                 if (((caddr_t)nd + len) >= end_outb) {
1166                         error = EINVAL;
1167                         goto end;
1168                 }
1169                 nd->d_ino = ip->i_icb_lbano;
1170                 nd->d_reclen = (uint16_t)len;
1171                 nd->d_off = 0x10;
1172                 nd->d_name[0] = '.';
1173                 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1174                 nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1175                 outcount++;
1176         } else if (offset == 0x10) {
1177                 offset = 0;
1178         }
1179 
1180         while (offset < dirsiz) {
1181                 error = ud_get_next_fid(ip, &fbp,
1182                     offset, &fid, &name, buf);
1183                 if (error != 0) {
1184                         break;
1185                 }
1186 
1187                 if ((fid->fid_flags & FID_DELETED) == 0) {
1188                         if (fid->fid_flags & FID_PARENT) {
1189 
1190                                 len = DIRENT64_RECLEN(2);
1191                                 if (((caddr_t)nd + len) >= end_outb) {
1192                                         error = EINVAL;
1193                                         break;
1194                                 }
1195 
1196                                 nd->d_ino = ip->i_icb_lbano;
1197                                 nd->d_reclen = (uint16_t)len;
1198                                 nd->d_off = offset + FID_LEN(fid);
1199                                 nd->d_name[0] = '.';
1200                                 nd->d_name[1] = '.';
1201                                 bzero(&nd->d_name[2],
1202                                     DIRENT64_NAMELEN(len) - 2);
1203                                 nd = (struct dirent64 *)
1204                                     ((char *)nd + nd->d_reclen);
1205                         } else {
1206                                 if ((error = ud_uncompress(fid->fid_idlen,
1207                                     &length, name, dname)) != 0) {
1208                                         break;
1209                                 }
1210                                 if (length == 0) {
1211                                         offset += FID_LEN(fid);
1212                                         continue;
1213                                 }
1214                                 len = DIRENT64_RECLEN(length);
1215                                 if (((caddr_t)nd + len) >= end_outb) {
1216                                         if (!outcount) {
1217                                                 error = EINVAL;
1218                                         }
1219                                         break;
1220                                 }
1221                                 (void) strncpy(nd->d_name,
1222                                     (caddr_t)dname, length);
1223                                 bzero(&nd->d_name[length],
1224                                     DIRENT64_NAMELEN(len) - length);
1225                                 nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1226                                     SWAP_16(fid->fid_icb.lad_ext_prn),
1227                                     SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1228                                     &dummy);
1229                                 nd->d_reclen = (uint16_t)len;
1230                                 nd->d_off = offset + FID_LEN(fid);
1231                                 nd = (struct dirent64 *)
1232                                     ((char *)nd + nd->d_reclen);
1233                         }
1234                         outcount++;
1235                 }
1236 
1237                 offset += FID_LEN(fid);
1238         }
1239 
1240 end:
1241         if (fbp != NULL) {
1242                 fbrelse(fbp, S_OTHER);
1243         }
1244         ndlen = ((char *)nd - outbuf);
1245         /*
1246          * In case of error do not call uiomove.
1247          * Return the error to the caller.
1248          */
1249         if ((error == 0) && (ndlen != 0)) {
1250                 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1251                 uiop->uio_offset = offset;
1252         }
1253         kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1254         kmem_free((caddr_t)dname, 1024);
1255         kmem_free(outbuf, (uint32_t)bufsize);
1256         if (eofp && error == 0) {
1257                 *eofp = (uiop->uio_offset >= dirsiz);
1258         }
1259         return (error);
1260 }
1261 
1262 /* ARGSUSED */
1263 static int32_t
1264 udf_symlink(
1265         struct vnode *dvp,
1266         char *linkname,
1267         struct vattr *vap,
1268         char *target,
1269         struct cred *cr,
1270         caller_context_t *ct,
1271         int flags)
1272 {
1273         int32_t error = 0, outlen;
1274         uint32_t ioflag = 0;
1275         struct ud_inode *ip, *dip = VTOI(dvp);
1276 
1277         struct path_comp *pc;
1278         int8_t *dname = NULL, *uname = NULL, *sp;
1279 
1280         ud_printf("udf_symlink\n");
1281 
1282         ip = (struct ud_inode *)0;
1283         vap->va_type = VLNK;
1284         vap->va_rdev = 0;
1285 
1286         rw_enter(&dip->i_rwlock, RW_WRITER);
1287         error = ud_direnter(dip, linkname, DE_CREATE,
1288             (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1289         rw_exit(&dip->i_rwlock);
1290         if (error == 0) {
1291                 dname = kmem_zalloc(1024, KM_SLEEP);
1292                 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1293 
1294                 pc = (struct path_comp *)uname;
1295                 /*
1296                  * If the first character in target is "/"
1297                  * then skip it and create entry for it
1298                  */
1299                 if (*target == '/') {
1300                         pc->pc_type = 2;
1301                         pc->pc_len = 0;
1302                         pc = (struct path_comp *)(((char *)pc) + 4);
1303                         while (*target == '/') {
1304                                 target++;
1305                         }
1306                 }
1307 
1308                 while (*target != NULL) {
1309                         sp = target;
1310                         while ((*target != '/') && (*target != '\0')) {
1311                                 target ++;
1312                         }
1313                         /*
1314                          * We got the next component of the
1315                          * path name. Create path_comp of
1316                          * appropriate type
1317                          */
1318                         if (((target - sp) == 1) && (*sp == '.')) {
1319                                 /*
1320                                  * Dot entry.
1321                                  */
1322                                 pc->pc_type = 4;
1323                                 pc = (struct path_comp *)(((char *)pc) + 4);
1324                         } else if (((target - sp) == 2) &&
1325                             (*sp == '.') && ((*(sp + 1)) == '.')) {
1326                                 /*
1327                                  * DotDot entry.
1328                                  */
1329                                 pc->pc_type = 3;
1330                                 pc = (struct path_comp *)(((char *)pc) + 4);
1331                         } else {
1332                                 /*
1333                                  * convert the user given name
1334                                  * into appropriate form to be put
1335                                  * on the media
1336                                  */
1337                                 outlen = 1024;  /* set to size of dname */
1338                                 if (error = ud_compress(target - sp, &outlen,
1339                                     (uint8_t *)sp, (uint8_t *)dname)) {
1340                                         break;
1341                                 }
1342                                 pc->pc_type = 5;
1343                                 /* LINTED */
1344                                 pc->pc_len = outlen;
1345                                 dname[outlen] = '\0';
1346                                 (void) strcpy((char *)pc->pc_id, dname);
1347                                 pc = (struct path_comp *)
1348                                     (((char *)pc) + 4 + outlen);
1349                         }
1350                         while (*target == '/') {
1351                                 target++;
1352                         }
1353                         if (*target == NULL) {
1354                                 break;
1355                         }
1356                 }
1357 
1358                 rw_enter(&ip->i_contents, RW_WRITER);
1359                 if (error == 0) {
1360                         ioflag = FWRITE;
1361                         if (curthread->t_flag & T_DONTPEND) {
1362                                 ioflag |= FDSYNC;
1363                         }
1364                         error = ud_rdwri(UIO_WRITE, ioflag, ip,
1365                             uname, ((int8_t *)pc) - uname,
1366                             (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1367                 }
1368                 if (error) {
1369                         ud_idrop(ip);
1370                         rw_exit(&ip->i_contents);
1371                         rw_enter(&dip->i_rwlock, RW_WRITER);
1372                         (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1373                             (struct vnode *)0, DR_REMOVE, cr, ct);
1374                         rw_exit(&dip->i_rwlock);
1375                         goto update_inode;
1376                 }
1377                 rw_exit(&ip->i_contents);
1378         }
1379 
1380         if ((error == 0) || (error == EEXIST)) {
1381                 VN_RELE(ITOV(ip));
1382         }
1383 
1384 update_inode:
1385         ITIMES(VTOI(dvp));
1386         if (uname != NULL) {
1387                 kmem_free(uname, PAGESIZE);
1388         }
1389         if (dname != NULL) {
1390                 kmem_free(dname, 1024);
1391         }
1392 
1393         return (error);
1394 }
1395 
1396 /* ARGSUSED */
1397 static int32_t
1398 udf_readlink(
1399         struct vnode *vp,
1400         struct uio *uiop,
1401         struct cred *cr,
1402         caller_context_t *ct)
1403 {
1404         int32_t error = 0, off, id_len, size, len;
1405         int8_t *dname = NULL, *uname = NULL;
1406         struct ud_inode *ip;
1407         struct fbuf *fbp = NULL;
1408         struct path_comp *pc;
1409 
1410         ud_printf("udf_readlink\n");
1411 
1412         if (vp->v_type != VLNK) {
1413                 return (EINVAL);
1414         }
1415 
1416         ip = VTOI(vp);
1417         size = ip->i_size;
1418         if (size > PAGESIZE) {
1419                 return (EIO);
1420         }
1421 
1422         if (size == 0) {
1423                 return (0);
1424         }
1425 
1426         dname = kmem_zalloc(1024, KM_SLEEP);
1427         uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1428 
1429         rw_enter(&ip->i_contents, RW_READER);
1430 
1431         if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1432                 goto end;
1433         }
1434 
1435         off = 0;
1436 
1437         while (off < size) {
1438                 pc = (struct path_comp *)(fbp->fb_addr + off);
1439                 switch (pc->pc_type) {
1440                         case 1 :
1441                                 (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1442                                 (void) strcat(uname, "/");
1443                                 break;
1444                         case 2 :
1445                                 if (pc->pc_len != 0) {
1446                                         goto end;
1447                                 }
1448                                 uname[0] = '/';
1449                                 uname[1] = '\0';
1450                                 break;
1451                         case 3 :
1452                                 (void) strcat(uname, "../");
1453                                 break;
1454                         case 4 :
1455                                 (void) strcat(uname, "./");
1456                                 break;
1457                         case 5 :
1458                                 if ((error = ud_uncompress(pc->pc_len, &id_len,
1459                                     pc->pc_id, (uint8_t *)dname)) != 0) {
1460                                         break;
1461                                 }
1462                                 dname[id_len] = '\0';
1463                                 (void) strcat(uname, dname);
1464                                 (void) strcat(uname, "/");
1465                                 break;
1466                         default :
1467                                 error = EINVAL;
1468                                 goto end;
1469                 }
1470                 off += 4 + pc->pc_len;
1471         }
1472         len = strlen(uname) - 1;
1473         if (uname[len] == '/') {
1474                 if (len == 0) {
1475                         /*
1476                          * special case link to /
1477                          */
1478                         len = 1;
1479                 } else {
1480                         uname[len] = '\0';
1481                 }
1482         }
1483 
1484         error = uiomove(uname, len, UIO_READ, uiop);
1485 
1486         ITIMES(ip);
1487 
1488 end:
1489         if (fbp != NULL) {
1490                 fbrelse(fbp, S_OTHER);
1491         }
1492         rw_exit(&ip->i_contents);
1493         if (uname != NULL) {
1494                 kmem_free(uname, PAGESIZE);
1495         }
1496         if (dname != NULL) {
1497                 kmem_free(dname, 1024);
1498         }
1499         return (error);
1500 }
1501 
1502 /* ARGSUSED */
1503 static int32_t
1504 udf_fsync(
1505         struct vnode *vp,
1506         int32_t syncflag,
1507         struct cred *cr,
1508         caller_context_t *ct)
1509 {
1510         int32_t error = 0;
1511         struct ud_inode *ip = VTOI(vp);
1512 
1513         ud_printf("udf_fsync\n");
1514 
1515         rw_enter(&ip->i_contents, RW_WRITER);
1516         if (!(IS_SWAPVP(vp))) {
1517                 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1518         }
1519         if (error == 0) {
1520                 error = ud_sync_indir(ip);
1521         }
1522         ITIMES(ip);             /* XXX: is this necessary ??? */
1523         rw_exit(&ip->i_contents);
1524 
1525         return (error);
1526 }
1527 
1528 /* ARGSUSED */
1529 static void
1530 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1531 {
1532         ud_printf("udf_iinactive\n");
1533 
1534         ud_iinactive(VTOI(vp), cr);
1535 }
1536 
1537 /* ARGSUSED */
1538 static int32_t
1539 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1540 {
1541         struct udf_fid *udfidp;
1542         struct ud_inode *ip = VTOI(vp);
1543 
1544         ud_printf("udf_fid\n");
1545 
1546         if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1547                 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1548                 return (ENOSPC);
1549         }
1550 
1551         udfidp = (struct udf_fid *)fidp;
1552         bzero((char *)udfidp, sizeof (struct udf_fid));
1553         rw_enter(&ip->i_contents, RW_READER);
1554         udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1555         udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1556         udfidp->udfid_prn = ip->i_icb_prn;
1557         udfidp->udfid_icb_lbn = ip->i_icb_block;
1558         rw_exit(&ip->i_contents);
1559 
1560         return (0);
1561 }
1562 
1563 /* ARGSUSED2 */
1564 static int
1565 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1566 {
1567         struct ud_inode *ip = VTOI(vp);
1568 
1569         ud_printf("udf_rwlock\n");
1570 
1571         if (write_lock) {
1572                 rw_enter(&ip->i_rwlock, RW_WRITER);
1573         } else {
1574                 rw_enter(&ip->i_rwlock, RW_READER);
1575         }
1576 #ifdef  __lock_lint
1577         rw_exit(&ip->i_rwlock);
1578 #endif
1579         return (write_lock);
1580 }
1581 
1582 /* ARGSUSED */
1583 static void
1584 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1585 {
1586         struct ud_inode *ip = VTOI(vp);
1587 
1588         ud_printf("udf_rwunlock\n");
1589 
1590 #ifdef  __lock_lint
1591         rw_enter(&ip->i_rwlock, RW_WRITER);
1592 #endif
1593 
1594         rw_exit(&ip->i_rwlock);
1595 
1596 }
1597 
1598 /* ARGSUSED */
1599 static int32_t
1600 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1601 {
1602         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1603 }
1604 
1605 static int32_t
1606 udf_frlock(
1607         struct vnode *vp,
1608         int32_t cmd,
1609         struct flock64 *bfp,
1610         int32_t flag,
1611         offset_t offset,
1612         struct flk_callback *flk_cbp,
1613         cred_t *cr,
1614         caller_context_t *ct)
1615 {
1616         struct ud_inode *ip = VTOI(vp);
1617 
1618         ud_printf("udf_frlock\n");
1619 
1620         /*
1621          * If file is being mapped, disallow frlock.
1622          * XXX I am not holding tlock while checking i_mapcnt because the
1623          * current locking strategy drops all locks before calling fs_frlock.
1624          * So, mapcnt could change before we enter fs_frlock making is
1625          * meaningless to have held tlock in the first place.
1626          */
1627         if ((ip->i_mapcnt > 0) &&
1628             (MANDLOCK(vp, ip->i_char))) {
1629                 return (EAGAIN);
1630         }
1631 
1632         return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1633 }
1634 
1635 /*ARGSUSED6*/
1636 static int32_t
1637 udf_space(
1638         struct vnode *vp,
1639         int32_t cmd,
1640         struct flock64 *bfp,
1641         int32_t flag,
1642         offset_t offset,
1643         cred_t *cr,
1644         caller_context_t *ct)
1645 {
1646         int32_t error = 0;
1647 
1648         ud_printf("udf_space\n");
1649 
1650         if (cmd != F_FREESP) {
1651                 error =  EINVAL;
1652         } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1653                 error = ud_freesp(vp, bfp, flag, cr);
1654 
1655                 if (error == 0) {
1656                         if (bfp->l_start == 0) {
1657                                 vnevent_truncate(vp, ct);
1658                         } else {
1659                                 vnevent_resize(vp, ct);
1660                         }
1661                 }
1662         }
1663 
1664         return (error);
1665 }
1666 
1667 /* ARGSUSED */
1668 static int32_t
1669 udf_getpage(
1670         struct vnode *vp,
1671         offset_t off,
1672         size_t len,
1673         uint32_t *protp,
1674         struct page **plarr,
1675         size_t plsz,
1676         struct seg *seg,
1677         caddr_t addr,
1678         enum seg_rw rw,
1679         struct cred *cr,
1680         caller_context_t *ct)
1681 {
1682         struct ud_inode *ip = VTOI(vp);
1683         int32_t error, has_holes, beyond_eof, seqmode, dolock;
1684         int32_t pgsize = PAGESIZE;
1685         struct udf_vfs *udf_vfsp = ip->i_udf;
1686         page_t **pl;
1687         u_offset_t pgoff, eoff, uoff;
1688         krw_t rwtype;
1689         caddr_t pgaddr;
1690 
1691         ud_printf("udf_getpage\n");
1692 
1693         uoff = (u_offset_t)off; /* type conversion */
1694         if (protp) {
1695                 *protp = PROT_ALL;
1696         }
1697         if (vp->v_flag & VNOMAP) {
1698                 return (ENOSYS);
1699         }
1700         seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1701 
1702         rwtype = RW_READER;
1703         dolock = (rw_owner(&ip->i_contents) != curthread);
1704 retrylock:
1705 #ifdef  __lock_lint
1706         rw_enter(&ip->i_contents, rwtype);
1707 #else
1708         if (dolock) {
1709                 rw_enter(&ip->i_contents, rwtype);
1710         }
1711 #endif
1712 
1713         /*
1714          * We may be getting called as a side effect of a bmap using
1715          * fbread() when the blocks might be being allocated and the
1716          * size has not yet been up'ed.  In this case we want to be
1717          * able to return zero pages if we get back UDF_HOLE from
1718          * calling bmap for a non write case here.  We also might have
1719          * to read some frags from the disk into a page if we are
1720          * extending the number of frags for a given lbn in bmap().
1721          */
1722         beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1723         if (beyond_eof && seg != segkmap) {
1724 #ifdef  __lock_lint
1725                 rw_exit(&ip->i_contents);
1726 #else
1727                 if (dolock) {
1728                         rw_exit(&ip->i_contents);
1729                 }
1730 #endif
1731                 return (EFAULT);
1732         }
1733 
1734         /*
1735          * Must hold i_contents lock throughout the call to pvn_getpages
1736          * since locked pages are returned from each call to ud_getapage.
1737          * Must *not* return locked pages and then try for contents lock
1738          * due to lock ordering requirements (inode > page)
1739          */
1740 
1741         has_holes = ud_bmap_has_holes(ip);
1742 
1743         if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1744                 int32_t blk_size, count;
1745                 u_offset_t offset;
1746 
1747                 /*
1748                  * We must acquire the RW_WRITER lock in order to
1749                  * call bmap_write().
1750                  */
1751                 if (dolock && rwtype == RW_READER) {
1752                         rwtype = RW_WRITER;
1753 
1754                         if (!rw_tryupgrade(&ip->i_contents)) {
1755 
1756                                 rw_exit(&ip->i_contents);
1757 
1758                                 goto retrylock;
1759                         }
1760                 }
1761 
1762                 /*
1763                  * May be allocating disk blocks for holes here as
1764                  * a result of mmap faults. write(2) does the bmap_write
1765                  * in rdip/wrip, not here. We are not dealing with frags
1766                  * in this case.
1767                  */
1768                 offset = uoff;
1769                 while ((offset < uoff + len) &&
1770                     (offset < ip->i_size)) {
1771                         /*
1772                          * the variable "bnp" is to simplify the expression for
1773                          * the compiler; * just passing in &bn to bmap_write
1774                          * causes a compiler "loop"
1775                          */
1776 
1777                         blk_size = udf_vfsp->udf_lbsize;
1778                         if ((offset + blk_size) > ip->i_size) {
1779                                 count = ip->i_size - offset;
1780                         } else {
1781                                 count = blk_size;
1782                         }
1783                         error = ud_bmap_write(ip, offset, count, 0, cr);
1784                         if (error) {
1785                                 goto update_inode;
1786                         }
1787                         offset += count; /* XXX - make this contig */
1788                 }
1789         }
1790 
1791         /*
1792          * Can be a reader from now on.
1793          */
1794 #ifdef  __lock_lint
1795         if (rwtype == RW_WRITER) {
1796                 rw_downgrade(&ip->i_contents);
1797         }
1798 #else
1799         if (dolock && rwtype == RW_WRITER) {
1800                 rw_downgrade(&ip->i_contents);
1801         }
1802 #endif
1803 
1804         /*
1805          * We remove PROT_WRITE in cases when the file has UDF holes
1806          * because we don't  want to call bmap_read() to check each
1807          * page if it is backed with a disk block.
1808          */
1809         if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1810                 *protp &= ~PROT_WRITE;
1811         }
1812 
1813         error = 0;
1814 
1815         /*
1816          * The loop looks up pages in the range <off, off + len).
1817          * For each page, we first check if we should initiate an asynchronous
1818          * read ahead before we call page_lookup (we may sleep in page_lookup
1819          * for a previously initiated disk read).
1820          */
1821         eoff = (uoff + len);
1822         for (pgoff = uoff, pgaddr = addr, pl = plarr;
1823             pgoff < eoff; /* empty */) {
1824                 page_t  *pp;
1825                 u_offset_t      nextrio;
1826                 se_t    se;
1827 
1828                 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1829 
1830                 /*
1831                  * Handle async getpage (faultahead)
1832                  */
1833                 if (plarr == NULL) {
1834                         ip->i_nextrio = pgoff;
1835                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1836                         pgoff += pgsize;
1837                         pgaddr += pgsize;
1838                         continue;
1839                 }
1840 
1841                 /*
1842                  * Check if we should initiate read ahead of next cluster.
1843                  * We call page_exists only when we need to confirm that
1844                  * we have the current page before we initiate the read ahead.
1845                  */
1846                 nextrio = ip->i_nextrio;
1847                 if (seqmode &&
1848                     pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1849                     nextrio < ip->i_size && page_exists(vp, pgoff))
1850                         ud_getpage_ra(vp, pgoff, seg, pgaddr);
1851 
1852                 if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1853 
1854                         /*
1855                          * We found the page in the page cache.
1856                          */
1857                         *pl++ = pp;
1858                         pgoff += pgsize;
1859                         pgaddr += pgsize;
1860                         len -= pgsize;
1861                         plsz -= pgsize;
1862                 } else  {
1863 
1864                         /*
1865                          * We have to create the page, or read it from disk.
1866                          */
1867                         if (error = ud_getpage_miss(vp, pgoff, len,
1868                             seg, pgaddr, pl, plsz, rw, seqmode)) {
1869                                 goto error_out;
1870                         }
1871 
1872                         while (*pl != NULL) {
1873                                 pl++;
1874                                 pgoff += pgsize;
1875                                 pgaddr += pgsize;
1876                                 len -= pgsize;
1877                                 plsz -= pgsize;
1878                         }
1879                 }
1880         }
1881 
1882         /*
1883          * Return pages up to plsz if they are in the page cache.
1884          * We cannot return pages if there is a chance that they are
1885          * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1886          */
1887         if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1888 
1889                 ASSERT((protp == NULL) ||
1890                     !(has_holes && (*protp & PROT_WRITE)));
1891 
1892                 eoff = pgoff + plsz;
1893                 while (pgoff < eoff) {
1894                         page_t          *pp;
1895 
1896                         if ((pp = page_lookup_nowait(vp, pgoff,
1897                             SE_SHARED)) == NULL)
1898                                 break;
1899 
1900                         *pl++ = pp;
1901                         pgoff += pgsize;
1902                         plsz -= pgsize;
1903                 }
1904         }
1905 
1906         if (plarr)
1907                 *pl = NULL;                     /* Terminate page list */
1908         ip->i_nextr = pgoff;
1909 
1910 error_out:
1911         if (error && plarr) {
1912                 /*
1913                  * Release any pages we have locked.
1914                  */
1915                 while (pl > &plarr[0])
1916                         page_unlock(*--pl);
1917 
1918                 plarr[0] = NULL;
1919         }
1920 
1921 update_inode:
1922 #ifdef  __lock_lint
1923         rw_exit(&ip->i_contents);
1924 #else
1925         if (dolock) {
1926                 rw_exit(&ip->i_contents);
1927         }
1928 #endif
1929 
1930         /*
1931          * If the inode is not already marked for IACC (in rwip() for read)
1932          * and the inode is not marked for no access time update (in rwip()
1933          * for write) then update the inode access time and mod time now.
1934          */
1935         mutex_enter(&ip->i_tlock);
1936         if ((ip->i_flag & (IACC | INOACC)) == 0) {
1937                 if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1938                         ip->i_flag |= IACC;
1939                 }
1940                 if (rw == S_WRITE) {
1941                         ip->i_flag |= IUPD;
1942                 }
1943                 ITIMES_NOLOCK(ip);
1944         }
1945         mutex_exit(&ip->i_tlock);
1946 
1947         return (error);
1948 }
1949 
1950 int32_t ud_delay = 1;
1951 
1952 /* ARGSUSED */
1953 static int32_t
1954 udf_putpage(
1955         struct vnode *vp,
1956         offset_t off,
1957         size_t len,
1958         int32_t flags,
1959         struct cred *cr,
1960         caller_context_t *ct)
1961 {
1962         struct ud_inode *ip;
1963         int32_t error = 0;
1964 
1965         ud_printf("udf_putpage\n");
1966 
1967         ip = VTOI(vp);
1968 #ifdef  __lock_lint
1969         rw_enter(&ip->i_contents, RW_WRITER);
1970 #endif
1971 
1972         if (vp->v_count == 0) {
1973                 cmn_err(CE_WARN, "ud_putpage : bad v_count");
1974                 error = EINVAL;
1975                 goto out;
1976         }
1977 
1978         if (vp->v_flag & VNOMAP) {
1979                 error = ENOSYS;
1980                 goto out;
1981         }
1982 
1983         if (flags & B_ASYNC) {
1984                 if (ud_delay && len &&
1985                     (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1986                         mutex_enter(&ip->i_tlock);
1987 
1988                         /*
1989                          * If nobody stalled, start a new cluster.
1990                          */
1991                         if (ip->i_delaylen == 0) {
1992                                 ip->i_delayoff = off;
1993                                 ip->i_delaylen = len;
1994                                 mutex_exit(&ip->i_tlock);
1995                                 goto out;
1996                         }
1997 
1998                         /*
1999                          * If we have a full cluster or they are not contig,
2000                          * then push last cluster and start over.
2001                          */
2002                         if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
2003                             ip->i_delayoff + ip->i_delaylen != off) {
2004                                 u_offset_t doff;
2005                                 size_t dlen;
2006 
2007                                 doff = ip->i_delayoff;
2008                                 dlen = ip->i_delaylen;
2009                                 ip->i_delayoff = off;
2010                                 ip->i_delaylen = len;
2011                                 mutex_exit(&ip->i_tlock);
2012                                 error = ud_putpages(vp, doff, dlen, flags, cr);
2013                                 /* LMXXX - flags are new val, not old */
2014                                 goto out;
2015                         }
2016 
2017                         /*
2018                          * There is something there, it's not full, and
2019                          * it is contig.
2020                          */
2021                         ip->i_delaylen += len;
2022                         mutex_exit(&ip->i_tlock);
2023                         goto out;
2024                 }
2025 
2026                 /*
2027                  * Must have weird flags or we are not clustering.
2028                  */
2029         }
2030 
2031         error = ud_putpages(vp, off, len, flags, cr);
2032 
2033 out:
2034 #ifdef  __lock_lint
2035         rw_exit(&ip->i_contents);
2036 #endif
2037         return (error);
2038 }
2039 
2040 /* ARGSUSED */
2041 static int32_t
2042 udf_map(
2043         struct vnode *vp,
2044         offset_t off,
2045         struct as *as,
2046         caddr_t *addrp,
2047         size_t len,
2048         uint8_t prot,
2049         uint8_t maxprot,
2050         uint32_t flags,
2051         struct cred *cr,
2052         caller_context_t *ct)
2053 {
2054         struct segvn_crargs vn_a;
2055         int32_t error = 0;
2056 
2057         ud_printf("udf_map\n");
2058 
2059         if (vp->v_flag & VNOMAP) {
2060                 error = ENOSYS;
2061                 goto end;
2062         }
2063 
2064         if ((off < (offset_t)0) ||
2065             ((off + len) < (offset_t)0)) {
2066                 error = EINVAL;
2067                 goto end;
2068         }
2069 
2070         if (vp->v_type != VREG) {
2071                 error = ENODEV;
2072                 goto end;
2073         }
2074 
2075         /*
2076          * If file is being locked, disallow mapping.
2077          */
2078         if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2079                 error = EAGAIN;
2080                 goto end;
2081         }
2082 
2083         as_rangelock(as);
2084         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2085         if (error != 0) {
2086                 as_rangeunlock(as);
2087                 goto end;
2088         }
2089 
2090         vn_a.vp = vp;
2091         vn_a.offset = off;
2092         vn_a.type = flags & MAP_TYPE;
2093         vn_a.prot = prot;
2094         vn_a.maxprot = maxprot;
2095         vn_a.cred = cr;
2096         vn_a.amp = NULL;
2097         vn_a.flags = flags & ~MAP_TYPE;
2098         vn_a.szc = 0;
2099         vn_a.lgrp_mem_policy_flags = 0;
2100 
2101         error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2102         as_rangeunlock(as);
2103 
2104 end:
2105         return (error);
2106 }
2107 
2108 /* ARGSUSED */
2109 static int32_t
2110 udf_addmap(struct vnode *vp,
2111         offset_t off,
2112         struct as *as,
2113         caddr_t addr,
2114         size_t len,
2115         uint8_t prot,
2116         uint8_t maxprot,
2117         uint32_t flags,
2118         struct cred *cr,
2119         caller_context_t *ct)
2120 {
2121         struct ud_inode *ip = VTOI(vp);
2122 
2123         ud_printf("udf_addmap\n");
2124 
2125         if (vp->v_flag & VNOMAP) {
2126                 return (ENOSYS);
2127         }
2128 
2129         mutex_enter(&ip->i_tlock);
2130         ip->i_mapcnt += btopr(len);
2131         mutex_exit(&ip->i_tlock);
2132 
2133         return (0);
2134 }
2135 
2136 /* ARGSUSED */
2137 static int32_t
2138 udf_delmap(
2139         struct vnode *vp, offset_t off,
2140         struct as *as,
2141         caddr_t addr,
2142         size_t len,
2143         uint32_t prot,
2144         uint32_t maxprot,
2145         uint32_t flags,
2146         struct cred *cr,
2147         caller_context_t *ct)
2148 {
2149         struct ud_inode *ip = VTOI(vp);
2150 
2151         ud_printf("udf_delmap\n");
2152 
2153         if (vp->v_flag & VNOMAP) {
2154                 return (ENOSYS);
2155         }
2156 
2157         mutex_enter(&ip->i_tlock);
2158         ip->i_mapcnt -= btopr(len);  /* Count released mappings */
2159         ASSERT(ip->i_mapcnt >= 0);
2160         mutex_exit(&ip->i_tlock);
2161 
2162         return (0);
2163 }
2164 
2165 /* ARGSUSED */
2166 static int32_t
2167 udf_l_pathconf(
2168         struct vnode *vp,
2169         int32_t cmd,
2170         ulong_t *valp,
2171         struct cred *cr,
2172         caller_context_t *ct)
2173 {
2174         int32_t error = 0;
2175 
2176         ud_printf("udf_l_pathconf\n");
2177 
2178         if (cmd == _PC_FILESIZEBITS) {
2179                 /*
2180                  * udf supports 64 bits as file size
2181                  * but there are several other restrictions
2182                  * it only supports 32-bit block numbers and
2183                  * daddr32_t is only and int32_t so taking these
2184                  * into account we can stay just as where ufs is
2185                  */
2186                 *valp = 41;
2187         } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2188                 /* nanosecond timestamp resolution */
2189                 *valp = 1L;
2190         } else {
2191                 error = fs_pathconf(vp, cmd, valp, cr, ct);
2192         }
2193 
2194         return (error);
2195 }
2196 
2197 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2198 #ifndef __lint
2199 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2200 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2201 #endif
2202 /*
2203  * Assumption is that there will not be a pageio request
2204  * to a enbedded file
2205  */
2206 /* ARGSUSED */
2207 static int32_t
2208 udf_pageio(
2209         struct vnode *vp,
2210         struct page *pp,
2211         u_offset_t io_off,
2212         size_t io_len,
2213         int32_t flags,
2214         struct cred *cr,
2215         caller_context_t *ct)
2216 {
2217         daddr_t bn;
2218         struct buf *bp;
2219         struct ud_inode *ip = VTOI(vp);
2220         int32_t dolock, error = 0, contig, multi_io;
2221         size_t done_len = 0, cur_len = 0;
2222         page_t *npp = NULL, *opp = NULL, *cpp = pp;
2223 
2224         if (pp == NULL) {
2225                 return (EINVAL);
2226         }
2227 
2228         dolock = (rw_owner(&ip->i_contents) != curthread);
2229 
2230         /*
2231          * We need a better check.  Ideally, we would use another
2232          * vnodeops so that hlocked and forcibly unmounted file
2233          * systems would return EIO where appropriate and w/o the
2234          * need for these checks.
2235          */
2236         if (ip->i_udf == NULL) {
2237                 return (EIO);
2238         }
2239 
2240 #ifdef  __lock_lint
2241         rw_enter(&ip->i_contents, RW_READER);
2242 #else
2243         if (dolock) {
2244                 rw_enter(&ip->i_contents, RW_READER);
2245         }
2246 #endif
2247 
2248         /*
2249          * Break the io request into chunks, one for each contiguous
2250          * stretch of disk blocks in the target file.
2251          */
2252         while (done_len < io_len) {
2253                 ASSERT(cpp);
2254                 bp = NULL;
2255                 contig = 0;
2256                 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2257                     &bn, &contig)) {
2258                         break;
2259                 }
2260 
2261                 if (bn == UDF_HOLE) {   /* No holey swapfiles */
2262                         cmn_err(CE_WARN, "SWAP file has HOLES");
2263                         error = EINVAL;
2264                         break;
2265                 }
2266 
2267                 cur_len = MIN(io_len - done_len, contig);
2268 
2269                 /*
2270                  * Check if more than one I/O is
2271                  * required to complete the given
2272                  * I/O operation
2273                  */
2274                 if (ip->i_udf->udf_lbsize < PAGESIZE) {
2275                         if (cur_len >= PAGESIZE) {
2276                                 multi_io = 0;
2277                                 cur_len &= PAGEMASK;
2278                         } else {
2279                                 multi_io = 1;
2280                                 cur_len = MIN(io_len - done_len, PAGESIZE);
2281                         }
2282                 }
2283                 page_list_break(&cpp, &npp, btop(cur_len));
2284 
2285                 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2286                 ASSERT(bp != NULL);
2287 
2288                 bp->b_edev = ip->i_dev;
2289                 bp->b_dev = cmpdev(ip->i_dev);
2290                 bp->b_blkno = bn;
2291                 bp->b_un.b_addr = (caddr_t)0;
2292                 bp->b_file = vp;
2293                 bp->b_offset = (offset_t)(io_off + done_len);
2294 
2295 /*
2296  *              ub.ub_pageios.value.ul++;
2297  */
2298                 if (multi_io == 0) {
2299                         (void) bdev_strategy(bp);
2300                 } else {
2301                         error = ud_multi_strat(ip, cpp, bp,
2302                             (u_offset_t)(io_off + done_len));
2303                         if (error != 0) {
2304                                 pageio_done(bp);
2305                                 break;
2306                         }
2307                 }
2308                 if (flags & B_READ) {
2309                         ud_pageio_reads++;
2310                 } else {
2311                         ud_pageio_writes++;
2312                 }
2313 
2314                 /*
2315                  * If the request is not B_ASYNC, wait for i/o to complete
2316                  * and re-assemble the page list to return to the caller.
2317                  * If it is B_ASYNC we leave the page list in pieces and
2318                  * cleanup() will dispose of them.
2319                  */
2320                 if ((flags & B_ASYNC) == 0) {
2321                         error = biowait(bp);
2322                         pageio_done(bp);
2323                         if (error) {
2324                                 break;
2325                         }
2326                         page_list_concat(&opp, &cpp);
2327                 }
2328                 cpp = npp;
2329                 npp = NULL;
2330                 done_len += cur_len;
2331         }
2332 
2333         ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2334         if (error) {
2335                 if (flags & B_ASYNC) {
2336                         /* Cleanup unprocessed parts of list */
2337                         page_list_concat(&cpp, &npp);
2338                         if (flags & B_READ) {
2339                                 pvn_read_done(cpp, B_ERROR);
2340                         } else {
2341                                 pvn_write_done(cpp, B_ERROR);
2342                         }
2343                 } else {
2344                         /* Re-assemble list and let caller clean up */
2345                         page_list_concat(&opp, &cpp);
2346                         page_list_concat(&opp, &npp);
2347                 }
2348         }
2349 
2350 #ifdef  __lock_lint
2351         rw_exit(&ip->i_contents);
2352 #else
2353         if (dolock) {
2354                 rw_exit(&ip->i_contents);
2355         }
2356 #endif
2357         return (error);
2358 }
2359 
2360 
2361 
2362 
2363 /* -------------------- local functions --------------------------- */
2364 
2365 
2366 
2367 int32_t
2368 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2369         struct ud_inode *ip, caddr_t base, int32_t len,
2370         offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2371 {
2372         int32_t error;
2373         struct uio auio;
2374         struct iovec aiov;
2375 
2376         ud_printf("ud_rdwri\n");
2377 
2378         bzero((caddr_t)&auio, sizeof (uio_t));
2379         bzero((caddr_t)&aiov, sizeof (iovec_t));
2380 
2381         aiov.iov_base = base;
2382         aiov.iov_len = len;
2383         auio.uio_iov = &aiov;
2384         auio.uio_iovcnt = 1;
2385         auio.uio_loffset = offset;
2386         auio.uio_segflg = (int16_t)seg;
2387         auio.uio_resid = len;
2388 
2389         if (rw == UIO_WRITE) {
2390                 auio.uio_fmode = FWRITE;
2391                 auio.uio_extflg = UIO_COPY_DEFAULT;
2392                 auio.uio_llimit = curproc->p_fsz_ctl;
2393                 error = ud_wrip(ip, &auio, ioflag, cr);
2394         } else {
2395                 auio.uio_fmode = FREAD;
2396                 auio.uio_extflg = UIO_COPY_CACHED;
2397                 auio.uio_llimit = MAXOFFSET_T;
2398                 error = ud_rdip(ip, &auio, ioflag, cr);
2399         }
2400 
2401         if (aresid) {
2402                 *aresid = auio.uio_resid;
2403         } else if (auio.uio_resid) {
2404                 error = EIO;
2405         }
2406         return (error);
2407 }
2408 
2409 /*
2410  * Free behind hacks.  The pager is busted.
2411  * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2412  * or B_FREE_IF_TIGHT_ON_MEMORY.
2413  */
2414 int32_t ud_freebehind = 1;
2415 int32_t ud_smallfile = 32 * 1024;
2416 
2417 /* ARGSUSED */
2418 int32_t
2419 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2420         size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2421         size_t plsz, enum seg_rw rw, int32_t seq)
2422 {
2423         struct ud_inode *ip = VTOI(vp);
2424         int32_t err = 0;
2425         size_t io_len;
2426         u_offset_t io_off;
2427         u_offset_t pgoff;
2428         page_t *pp;
2429 
2430         pl[0] = NULL;
2431 
2432         /*
2433          * Figure out whether the page can be created, or must be
2434          * read from the disk
2435          */
2436         if (rw == S_CREATE) {
2437                 if ((pp = page_create_va(vp, off,
2438                     PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2439                         cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2440                         return (EINVAL);
2441                 }
2442                 io_len = PAGESIZE;
2443         } else {
2444                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2445                     &io_len, off, PAGESIZE, 0);
2446 
2447                 /*
2448                  * Some other thread has entered the page.
2449                  * ud_getpage will retry page_lookup.
2450                  */
2451                 if (pp == NULL) {
2452                         return (0);
2453                 }
2454 
2455                 /*
2456                  * Fill the page with as much data as we can from the file.
2457                  */
2458                 err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2459                 if (err) {
2460                         pvn_read_done(pp, B_ERROR);
2461                         return (err);
2462                 }
2463 
2464                 /*
2465                  * XXX ??? ufs has io_len instead of pgoff below
2466                  */
2467                 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2468 
2469                 /*
2470                  * If the file access is sequential, initiate read ahead
2471                  * of the next cluster.
2472                  */
2473                 if (seq && ip->i_nextrio < ip->i_size) {
2474                         ud_getpage_ra(vp, off, seg, addr);
2475                 }
2476         }
2477 
2478 outmiss:
2479         pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2480         return (err);
2481 }
2482 
2483 /* ARGSUSED */
2484 void
2485 ud_getpage_ra(struct vnode *vp,
2486         u_offset_t off, struct seg *seg, caddr_t addr)
2487 {
2488         page_t *pp;
2489         size_t io_len;
2490         struct ud_inode *ip = VTOI(vp);
2491         u_offset_t io_off = ip->i_nextrio, pgoff;
2492         caddr_t addr2 = addr + (io_off - off);
2493         daddr_t bn;
2494         int32_t contig = 0;
2495 
2496         /*
2497          * Is this test needed?
2498          */
2499 
2500         if (addr2 >= seg->s_base + seg->s_size) {
2501                 return;
2502         }
2503 
2504         contig = 0;
2505         if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2506                 return;
2507         }
2508 
2509         pp = pvn_read_kluster(vp, io_off, seg, addr2,
2510             &io_off, &io_len, io_off, PAGESIZE, 1);
2511 
2512         /*
2513          * Some other thread has entered the page.
2514          * So no read head done here (ie we will have to and wait
2515          * for the read when needed).
2516          */
2517 
2518         if (pp == NULL) {
2519                 return;
2520         }
2521 
2522         (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2523         ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2524 }
2525 
2526 int
2527 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2528         uint32_t bflgs, u_offset_t *pg_off)
2529 {
2530         daddr_t bn;
2531         struct buf *bp;
2532         caddr_t kaddr, caddr;
2533         int32_t error = 0, contig = 0, multi_io = 0;
2534         int32_t lbsize = ip->i_udf->udf_lbsize;
2535         int32_t lbmask = ip->i_udf->udf_lbmask;
2536         uint64_t isize;
2537 
2538         isize = (ip->i_size + lbmask) & (~lbmask);
2539         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2540 
2541                 /*
2542                  * Embedded file read file_entry
2543                  * from buffer cache and copy the required
2544                  * portions
2545                  */
2546                 bp = ud_bread(ip->i_dev,
2547                     ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2548                 if ((bp->b_error == 0) &&
2549                     (bp->b_resid == 0)) {
2550 
2551                         caddr = bp->b_un.b_addr + ip->i_data_off;
2552 
2553                         /*
2554                          * mapin to kvm
2555                          */
2556                         kaddr = (caddr_t)ppmapin(pp,
2557                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2558                         (void) kcopy(caddr, kaddr, ip->i_size);
2559 
2560                         /*
2561                          * mapout of kvm
2562                          */
2563                         ppmapout(kaddr);
2564                 }
2565                 brelse(bp);
2566                 contig = ip->i_size;
2567         } else {
2568 
2569                 /*
2570                  * Get the continuous size and block number
2571                  * at offset "off"
2572                  */
2573                 if (error = ud_bmap_read(ip, off, &bn, &contig))
2574                         goto out;
2575                 contig = MIN(contig, PAGESIZE);
2576                 contig = (contig + lbmask) & (~lbmask);
2577 
2578                 /*
2579                  * Zero part of the page which we are not
2580                  * going to read from the disk.
2581                  */
2582 
2583                 if (bn == UDF_HOLE) {
2584 
2585                         /*
2586                          * This is a HOLE. Just zero out
2587                          * the page
2588                          */
2589                         if (((off + contig) == isize) ||
2590                             (contig == PAGESIZE)) {
2591                                 pagezero(pp->p_prev, 0, PAGESIZE);
2592                                 goto out;
2593                         }
2594                 }
2595 
2596                 if (contig < PAGESIZE) {
2597                         uint64_t count;
2598 
2599                         count = isize - off;
2600                         if (contig != count) {
2601                                 multi_io = 1;
2602                                 contig = (int32_t)(MIN(count, PAGESIZE));
2603                         } else {
2604                                 pagezero(pp->p_prev, contig, PAGESIZE - contig);
2605                         }
2606                 }
2607 
2608                 /*
2609                  * Get a bp and initialize it
2610                  */
2611                 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2612                 ASSERT(bp != NULL);
2613 
2614                 bp->b_edev = ip->i_dev;
2615                 bp->b_dev = cmpdev(ip->i_dev);
2616                 bp->b_blkno = bn;
2617                 bp->b_un.b_addr = 0;
2618                 bp->b_file = ip->i_vnode;
2619 
2620                 /*
2621                  * Start I/O
2622                  */
2623                 if (multi_io == 0) {
2624 
2625                         /*
2626                          * Single I/O is sufficient for this page
2627                          */
2628                         (void) bdev_strategy(bp);
2629                 } else {
2630 
2631                         /*
2632                          * We need to do the I/O in
2633                          * piece's
2634                          */
2635                         error = ud_multi_strat(ip, pp, bp, off);
2636                         if (error != 0) {
2637                                 goto out;
2638                         }
2639                 }
2640                 if ((bflgs & B_ASYNC) == 0) {
2641 
2642                         /*
2643                          * Wait for i/o to complete.
2644                          */
2645 
2646                         error = biowait(bp);
2647                         pageio_done(bp);
2648                         if (error) {
2649                                 goto out;
2650                         }
2651                 }
2652         }
2653         if ((off + contig) >= ip->i_size) {
2654                 contig = ip->i_size - off;
2655         }
2656 
2657 out:
2658         *pg_off = contig;
2659         return (error);
2660 }
2661 
2662 int32_t
2663 ud_putpages(struct vnode *vp, offset_t off,
2664         size_t len, int32_t flags, struct cred *cr)
2665 {
2666         struct ud_inode *ip;
2667         page_t *pp;
2668         u_offset_t io_off;
2669         size_t io_len;
2670         u_offset_t eoff;
2671         int32_t err = 0;
2672         int32_t dolock;
2673 
2674         ud_printf("ud_putpages\n");
2675 
2676         if (vp->v_count == 0) {
2677                 cmn_err(CE_WARN, "ud_putpages: bad v_count");
2678                 return (EINVAL);
2679         }
2680 
2681         ip = VTOI(vp);
2682 
2683         /*
2684          * Acquire the readers/write inode lock before locking
2685          * any pages in this inode.
2686          * The inode lock is held during i/o.
2687          */
2688         if (len == 0) {
2689                 mutex_enter(&ip->i_tlock);
2690                 ip->i_delayoff = ip->i_delaylen = 0;
2691                 mutex_exit(&ip->i_tlock);
2692         }
2693 #ifdef  __lock_lint
2694         rw_enter(&ip->i_contents, RW_READER);
2695 #else
2696         dolock = (rw_owner(&ip->i_contents) != curthread);
2697         if (dolock) {
2698                 rw_enter(&ip->i_contents, RW_READER);
2699         }
2700 #endif
2701 
2702         if (!vn_has_cached_data(vp)) {
2703 #ifdef  __lock_lint
2704                 rw_exit(&ip->i_contents);
2705 #else
2706                 if (dolock) {
2707                         rw_exit(&ip->i_contents);
2708                 }
2709 #endif
2710                 return (0);
2711         }
2712 
2713         if (len == 0) {
2714                 /*
2715                  * Search the entire vp list for pages >= off.
2716                  */
2717                 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2718                     flags, cr);
2719         } else {
2720                 /*
2721                  * Loop over all offsets in the range looking for
2722                  * pages to deal with.
2723                  */
2724                 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2725                         eoff = MIN(off + len, eoff);
2726                 } else {
2727                         eoff = off + len;
2728                 }
2729 
2730                 for (io_off = off; io_off < eoff; io_off += io_len) {
2731                         /*
2732                          * If we are not invalidating, synchronously
2733                          * freeing or writing pages, use the routine
2734                          * page_lookup_nowait() to prevent reclaiming
2735                          * them from the free list.
2736                          */
2737                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2738                                 pp = page_lookup(vp, io_off,
2739                                     (flags & (B_INVAL | B_FREE)) ?
2740                                     SE_EXCL : SE_SHARED);
2741                         } else {
2742                                 pp = page_lookup_nowait(vp, io_off,
2743                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2744                         }
2745 
2746                         if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2747                                 io_len = PAGESIZE;
2748                         } else {
2749 
2750                                 err = ud_putapage(vp, pp,
2751                                     &io_off, &io_len, flags, cr);
2752                                 if (err != 0) {
2753                                         break;
2754                                 }
2755                                 /*
2756                                  * "io_off" and "io_len" are returned as
2757                                  * the range of pages we actually wrote.
2758                                  * This allows us to skip ahead more quickly
2759                                  * since several pages may've been dealt
2760                                  * with by this iteration of the loop.
2761                                  */
2762                         }
2763                 }
2764         }
2765         if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2766                 /*
2767                  * We have just sync'ed back all the pages on
2768                  * the inode, turn off the IMODTIME flag.
2769                  */
2770                 mutex_enter(&ip->i_tlock);
2771                 ip->i_flag &= ~IMODTIME;
2772                 mutex_exit(&ip->i_tlock);
2773         }
2774 #ifdef  __lock_lint
2775         rw_exit(&ip->i_contents);
2776 #else
2777         if (dolock) {
2778                 rw_exit(&ip->i_contents);
2779         }
2780 #endif
2781         return (err);
2782 }
2783 
2784 /* ARGSUSED */
2785 int32_t
2786 ud_putapage(struct vnode *vp,
2787         page_t *pp, u_offset_t *offp,
2788         size_t *lenp, int32_t flags, struct cred *cr)
2789 {
2790         daddr_t bn;
2791         size_t io_len;
2792         struct ud_inode *ip;
2793         int32_t error = 0, contig, multi_io = 0;
2794         struct udf_vfs *udf_vfsp;
2795         u_offset_t off, io_off;
2796         caddr_t kaddr, caddr;
2797         struct buf *bp = NULL;
2798         int32_t lbmask;
2799         uint64_t isize;
2800         uint16_t crc_len;
2801         struct file_entry *fe;
2802 
2803         ud_printf("ud_putapage\n");
2804 
2805         ip = VTOI(vp);
2806         ASSERT(ip);
2807         ASSERT(RW_LOCK_HELD(&ip->i_contents));
2808         lbmask = ip->i_udf->udf_lbmask;
2809         isize = (ip->i_size + lbmask) & (~lbmask);
2810 
2811         udf_vfsp = ip->i_udf;
2812         ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2813 
2814         /*
2815          * If the modified time on the inode has not already been
2816          * set elsewhere (e.g. for write/setattr) we set the time now.
2817          * This gives us approximate modified times for mmap'ed files
2818          * which are modified via stores in the user address space.
2819          */
2820         if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2821                 mutex_enter(&ip->i_tlock);
2822                 ip->i_flag |= IUPD;
2823                 ITIMES_NOLOCK(ip);
2824                 mutex_exit(&ip->i_tlock);
2825         }
2826 
2827 
2828         /*
2829          * Align the request to a block boundry (for old file systems),
2830          * and go ask bmap() how contiguous things are for this file.
2831          */
2832         off = pp->p_offset & ~(offset_t)lbmask;
2833                                 /* block align it */
2834 
2835 
2836         if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2837                 ASSERT(ip->i_size <= ip->i_max_emb);
2838 
2839                 pp = pvn_write_kluster(vp, pp, &io_off,
2840                     &io_len, off, PAGESIZE, flags);
2841                 if (io_len == 0) {
2842                         io_len = PAGESIZE;
2843                 }
2844 
2845                 bp = ud_bread(ip->i_dev,
2846                     ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2847                     udf_vfsp->udf_lbsize);
2848                 fe = (struct file_entry *)bp->b_un.b_addr;
2849                 if ((bp->b_flags & B_ERROR) ||
2850                     (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2851                     ip->i_icb_block,
2852                     1, udf_vfsp->udf_lbsize) != 0)) {
2853                         if (pp != NULL)
2854                                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2855                         if (bp->b_flags & B_ERROR) {
2856                                 error = EIO;
2857                         } else {
2858                                 error = EINVAL;
2859                         }
2860                         brelse(bp);
2861                         return (error);
2862                 }
2863                 if ((bp->b_error == 0) &&
2864                     (bp->b_resid == 0)) {
2865 
2866                         caddr = bp->b_un.b_addr + ip->i_data_off;
2867                         kaddr = (caddr_t)ppmapin(pp,
2868                             PROT_READ | PROT_WRITE, (caddr_t)-1);
2869                         (void) kcopy(kaddr, caddr, ip->i_size);
2870                         ppmapout(kaddr);
2871                 }
2872                 crc_len = offsetof(struct file_entry, fe_spec) +
2873                     SWAP_32(fe->fe_len_ear);
2874                 crc_len += ip->i_size;
2875                 ud_make_tag(ip->i_udf, &fe->fe_tag,
2876                     UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2877 
2878                 bwrite(bp);
2879 
2880                 if (flags & B_ASYNC) {
2881                         pvn_write_done(pp, flags);
2882                 }
2883                 contig = ip->i_size;
2884         } else {
2885 
2886                 if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2887                         goto out;
2888                 }
2889                 contig = MIN(contig, PAGESIZE);
2890                 contig = (contig + lbmask) & (~lbmask);
2891 
2892                 if (contig < PAGESIZE) {
2893                         uint64_t count;
2894 
2895                         count = isize - off;
2896                         if (contig != count) {
2897                                 multi_io = 1;
2898                                 contig = (int32_t)(MIN(count, PAGESIZE));
2899                         }
2900                 }
2901 
2902                 if ((off + contig) > isize) {
2903                         contig = isize - off;
2904                 }
2905 
2906                 if (contig > PAGESIZE) {
2907                         if (contig & PAGEOFFSET) {
2908                                 contig &= PAGEMASK;
2909                         }
2910                 }
2911 
2912                 pp = pvn_write_kluster(vp, pp, &io_off,
2913                     &io_len, off, contig, flags);
2914                 if (io_len == 0) {
2915                         io_len = PAGESIZE;
2916                 }
2917 
2918                 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2919                 ASSERT(bp != NULL);
2920 
2921                 bp->b_edev = ip->i_dev;
2922                 bp->b_dev = cmpdev(ip->i_dev);
2923                 bp->b_blkno = bn;
2924                 bp->b_un.b_addr = 0;
2925                 bp->b_file = vp;
2926                 bp->b_offset = (offset_t)off;
2927 
2928 
2929                 /*
2930                  * write throttle
2931                  */
2932                 ASSERT(bp->b_iodone == NULL);
2933                 bp->b_iodone = ud_iodone;
2934                 mutex_enter(&ip->i_tlock);
2935                 ip->i_writes += bp->b_bcount;
2936                 mutex_exit(&ip->i_tlock);
2937 
2938                 if (multi_io == 0) {
2939 
2940                         (void) bdev_strategy(bp);
2941                 } else {
2942                         error = ud_multi_strat(ip, pp, bp, off);
2943                         if (error != 0) {
2944                                 goto out;
2945                         }
2946                 }
2947 
2948                 if ((flags & B_ASYNC) == 0) {
2949                         /*
2950                          * Wait for i/o to complete.
2951                          */
2952                         error = biowait(bp);
2953                         pageio_done(bp);
2954                 }
2955         }
2956 
2957         if ((flags & B_ASYNC) == 0) {
2958                 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2959         }
2960 
2961         pp = NULL;
2962 
2963 out:
2964         if (error != 0 && pp != NULL) {
2965                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2966         }
2967 
2968         if (offp) {
2969                 *offp = io_off;
2970         }
2971         if (lenp) {
2972                 *lenp = io_len;
2973         }
2974 
2975         return (error);
2976 }
2977 
2978 
2979 int32_t
2980 ud_iodone(struct buf *bp)
2981 {
2982         struct ud_inode *ip;
2983 
2984         ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2985 
2986         bp->b_iodone = NULL;
2987 
2988         ip = VTOI(bp->b_pages->p_vnode);
2989 
2990         mutex_enter(&ip->i_tlock);
2991         if (ip->i_writes >= ud_LW) {
2992                 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2993                         if (ud_WRITES) {
2994                                 cv_broadcast(&ip->i_wrcv); /* wake all up */
2995                         }
2996                 }
2997         } else {
2998                 ip->i_writes -= bp->b_bcount;
2999         }
3000         mutex_exit(&ip->i_tlock);
3001         iodone(bp);
3002         return (0);
3003 }
3004 
3005 /* ARGSUSED3 */
3006 int32_t
3007 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
3008 {
3009         struct vnode *vp;
3010         struct udf_vfs *udf_vfsp;
3011         krw_t rwtype;
3012         caddr_t base;
3013         uint32_t flags;
3014         int32_t error, n, on, mapon, dofree;
3015         u_offset_t off;
3016         long oresid = uio->uio_resid;
3017 
3018         ASSERT(RW_LOCK_HELD(&ip->i_contents));
3019         if ((ip->i_type != VREG) &&
3020             (ip->i_type != VDIR) &&
3021             (ip->i_type != VLNK)) {
3022                 return (EIO);
3023         }
3024 
3025         if (uio->uio_loffset > MAXOFFSET_T) {
3026                 return (0);
3027         }
3028 
3029         if ((uio->uio_loffset < (offset_t)0) ||
3030             ((uio->uio_loffset + uio->uio_resid) < 0)) {
3031                 return (EINVAL);
3032         }
3033         if (uio->uio_resid == 0) {
3034                 return (0);
3035         }
3036 
3037         vp = ITOV(ip);
3038         udf_vfsp = ip->i_udf;
3039         mutex_enter(&ip->i_tlock);
3040         ip->i_flag |= IACC;
3041         mutex_exit(&ip->i_tlock);
3042 
3043         rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3044 
3045         do {
3046                 offset_t diff;
3047                 u_offset_t uoff = uio->uio_loffset;
3048                 off = uoff & (offset_t)MAXBMASK;
3049                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3050                 on = (int)blkoff(udf_vfsp, uoff);
3051                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3052 
3053                 diff = ip->i_size - uoff;
3054 
3055                 if (diff <= (offset_t)0) {
3056                         error = 0;
3057                         goto out;
3058                 }
3059                 if (diff < (offset_t)n) {
3060                         n = (int)diff;
3061                 }
3062                 dofree = ud_freebehind &&
3063                     ip->i_nextr == (off & PAGEMASK) &&
3064                     off > ud_smallfile;
3065 
3066 #ifndef __lock_lint
3067                 if (rwtype == RW_READER) {
3068                         rw_exit(&ip->i_contents);
3069                 }
3070 #endif
3071 
3072                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3073                     (uint32_t)n, 1, S_READ);
3074                 error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3075 
3076                 flags = 0;
3077                 if (!error) {
3078                         /*
3079                          * If read a whole block, or read to eof,
3080                          * won't need this buffer again soon.
3081                          */
3082                         if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3083                             freemem < lotsfree + pages_before_pager) {
3084                                 flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3085                         }
3086                         /*
3087                          * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3088                          * we want to make sure that the page which has
3089                          * been read, is written on disk if it is dirty.
3090                          * And corresponding indirect blocks should also
3091                          * be flushed out.
3092                          */
3093                         if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3094                                 flags &= ~SM_ASYNC;
3095                                 flags |= SM_WRITE;
3096                         }
3097                         error = segmap_release(segkmap, base, flags);
3098                 } else    {
3099                         (void) segmap_release(segkmap, base, flags);
3100                 }
3101 
3102 #ifndef __lock_lint
3103                 if (rwtype == RW_READER) {
3104                         rw_enter(&ip->i_contents, rwtype);
3105                 }
3106 #endif
3107         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3108 out:
3109         /*
3110          * Inode is updated according to this table if FRSYNC is set.
3111          *
3112          *      FSYNC   FDSYNC(posix.4)
3113          *      --------------------------
3114          *      always  IATTCHG|IBDWRITE
3115          */
3116         if (ioflag & FRSYNC) {
3117                 if ((ioflag & FSYNC) ||
3118                     ((ioflag & FDSYNC) &&
3119                     (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3120                 rw_exit(&ip->i_contents);
3121                 rw_enter(&ip->i_contents, RW_WRITER);
3122                 ud_iupdat(ip, 1);
3123                 }
3124         }
3125         /*
3126          * If we've already done a partial read, terminate
3127          * the read but return no error.
3128          */
3129         if (oresid != uio->uio_resid) {
3130                 error = 0;
3131         }
3132         ITIMES(ip);
3133 
3134         return (error);
3135 }
3136 
3137 int32_t
3138 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3139 {
3140         caddr_t base;
3141         struct vnode *vp;
3142         struct udf_vfs *udf_vfsp;
3143         uint32_t flags;
3144         int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3145         int32_t pagecreate, newpage;
3146         uint64_t old_i_size;
3147         u_offset_t off;
3148         long start_resid = uio->uio_resid, premove_resid;
3149         rlim64_t limit = uio->uio_limit;
3150 
3151 
3152         ASSERT(RW_WRITE_HELD(&ip->i_contents));
3153         if ((ip->i_type != VREG) &&
3154             (ip->i_type != VDIR) &&
3155             (ip->i_type != VLNK)) {
3156                 return (EIO);
3157         }
3158 
3159         if (uio->uio_loffset >= MAXOFFSET_T) {
3160                 return (EFBIG);
3161         }
3162         /*
3163          * see udf_l_pathconf
3164          */
3165         if (limit > (((uint64_t)1 << 40) - 1)) {
3166                 limit = ((uint64_t)1 << 40) - 1;
3167         }
3168         if (uio->uio_loffset >= limit) {
3169                 proc_t *p = ttoproc(curthread);
3170 
3171                 mutex_enter(&p->p_lock);
3172                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3173                     p, RCA_UNSAFE_SIGINFO);
3174                 mutex_exit(&p->p_lock);
3175                 return (EFBIG);
3176         }
3177         if ((uio->uio_loffset < (offset_t)0) ||
3178             ((uio->uio_loffset + uio->uio_resid) < 0)) {
3179                 return (EINVAL);
3180         }
3181         if (uio->uio_resid == 0) {
3182                 return (0);
3183         }
3184 
3185         mutex_enter(&ip->i_tlock);
3186         ip->i_flag |= INOACC;
3187 
3188         if (ioflag & (FSYNC | FDSYNC)) {
3189                 ip->i_flag |= ISYNC;
3190                 iupdat_flag = 1;
3191         }
3192         mutex_exit(&ip->i_tlock);
3193 
3194         udf_vfsp = ip->i_udf;
3195         vp = ITOV(ip);
3196 
3197         do {
3198                 u_offset_t uoff = uio->uio_loffset;
3199                 off = uoff & (offset_t)MAXBMASK;
3200                 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3201                 on = (int)blkoff(udf_vfsp, uoff);
3202                 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3203 
3204                 if (ip->i_type == VREG && uoff + n >= limit) {
3205                         if (uoff >= limit) {
3206                                 error = EFBIG;
3207                                 goto out;
3208                         }
3209                         n = (int)(limit - (rlim64_t)uoff);
3210                 }
3211                 if (uoff + n > ip->i_size) {
3212                         /*
3213                          * We are extending the length of the file.
3214                          * bmap is used so that we are sure that
3215                          * if we need to allocate new blocks, that it
3216                          * is done here before we up the file size.
3217                          */
3218                         error = ud_bmap_write(ip, uoff,
3219                             (int)(on + n), mapon == 0, cr);
3220                         if (error) {
3221                                 break;
3222                         }
3223                         i_size_changed = 1;
3224                         old_i_size = ip->i_size;
3225                         ip->i_size = uoff + n;
3226                         /*
3227                          * If we are writing from the beginning of
3228                          * the mapping, we can just create the
3229                          * pages without having to read them.
3230                          */
3231                         pagecreate = (mapon == 0);
3232                 } else if (n == MAXBSIZE) {
3233                         /*
3234                          * Going to do a whole mappings worth,
3235                          * so we can just create the pages w/o
3236                          * having to read them in.  But before
3237                          * we do that, we need to make sure any
3238                          * needed blocks are allocated first.
3239                          */
3240                         error = ud_bmap_write(ip, uoff,
3241                             (int)(on + n), 1, cr);
3242                         if (error) {
3243                                 break;
3244                         }
3245                         pagecreate = 1;
3246                 } else {
3247                         pagecreate = 0;
3248                 }
3249 
3250                 rw_exit(&ip->i_contents);
3251 
3252                 /*
3253                  * Touch the page and fault it in if it is not in
3254                  * core before segmap_getmapflt can lock it. This
3255                  * is to avoid the deadlock if the buffer is mapped
3256                  * to the same file through mmap which we want to
3257                  * write to.
3258                  */
3259                 uio_prefaultpages((long)n, uio);
3260 
3261                 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3262                     (uint32_t)n, !pagecreate, S_WRITE);
3263 
3264                 /*
3265                  * segmap_pagecreate() returns 1 if it calls
3266                  * page_create_va() to allocate any pages.
3267                  */
3268                 newpage = 0;
3269                 if (pagecreate) {
3270                         newpage = segmap_pagecreate(segkmap, base,
3271                             (size_t)n, 0);
3272                 }
3273 
3274                 premove_resid = uio->uio_resid;
3275                 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3276 
3277                 if (pagecreate &&
3278                     uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3279                         /*
3280                          * We created pages w/o initializing them completely,
3281                          * thus we need to zero the part that wasn't set up.
3282                          * This happens on most EOF write cases and if
3283                          * we had some sort of error during the uiomove.
3284                          */
3285                         int nzero, nmoved;
3286 
3287                         nmoved = (int)(uio->uio_loffset - (off + mapon));
3288                         ASSERT(nmoved >= 0 && nmoved <= n);
3289                         nzero = roundup(on + n, PAGESIZE) - nmoved;
3290                         ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3291                         (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3292                 }
3293 
3294                 /*
3295                  * Unlock the pages allocated by page_create_va()
3296                  * in segmap_pagecreate()
3297                  */
3298                 if (newpage) {
3299                         segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3300                 }
3301 
3302                 if (error) {
3303                         /*
3304                          * If we failed on a write, we may have already
3305                          * allocated file blocks as well as pages.  It's
3306                          * hard to undo the block allocation, but we must
3307                          * be sure to invalidate any pages that may have
3308                          * been allocated.
3309                          */
3310                         (void) segmap_release(segkmap, base, SM_INVAL);
3311                 } else {
3312                         flags = 0;
3313                         /*
3314                          * Force write back for synchronous write cases.
3315                          */
3316                         if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3317                                 /*
3318                                  * If the sticky bit is set but the
3319                                  * execute bit is not set, we do a
3320                                  * synchronous write back and free
3321                                  * the page when done.  We set up swap
3322                                  * files to be handled this way to
3323                                  * prevent servers from keeping around
3324                                  * the client's swap pages too long.
3325                                  * XXX - there ought to be a better way.
3326                                  */
3327                                 if (IS_SWAPVP(vp)) {
3328                                         flags = SM_WRITE | SM_FREE |
3329                                             SM_DONTNEED;
3330                                         iupdat_flag = 0;
3331                                 } else {
3332                                         flags = SM_WRITE;
3333                                 }
3334                         } else if (((mapon + n) == MAXBSIZE) ||
3335                             IS_SWAPVP(vp)) {
3336                                 /*
3337                                  * Have written a whole block.
3338                                  * Start an asynchronous write and
3339                                  * mark the buffer to indicate that
3340                                  * it won't be needed again soon.
3341                                  */
3342                                 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3343                         }
3344                         error = segmap_release(segkmap, base, flags);
3345 
3346                         /*
3347                          * If the operation failed and is synchronous,
3348                          * then we need to unwind what uiomove() last
3349                          * did so we can potentially return an error to
3350                          * the caller.  If this write operation was
3351                          * done in two pieces and the first succeeded,
3352                          * then we won't return an error for the second
3353                          * piece that failed.  However, we only want to
3354                          * return a resid value that reflects what was
3355                          * really done.
3356                          *
3357                          * Failures for non-synchronous operations can
3358                          * be ignored since the page subsystem will
3359                          * retry the operation until it succeeds or the
3360                          * file system is unmounted.
3361                          */
3362                         if (error) {
3363                                 if ((ioflag & (FSYNC | FDSYNC)) ||
3364                                     ip->i_type == VDIR) {
3365                                         uio->uio_resid = premove_resid;
3366                                 } else {
3367                                         error = 0;
3368                                 }
3369                         }
3370                 }
3371 
3372                 /*
3373                  * Re-acquire contents lock.
3374                  */
3375                 rw_enter(&ip->i_contents, RW_WRITER);
3376                 /*
3377                  * If the uiomove() failed or if a synchronous
3378                  * page push failed, fix up i_size.
3379                  */
3380                 if (error) {
3381                         if (i_size_changed) {
3382                                 /*
3383                                  * The uiomove failed, and we
3384                                  * allocated blocks,so get rid
3385                                  * of them.
3386                                  */
3387                                 (void) ud_itrunc(ip, old_i_size, 0, cr);
3388                         }
3389                 } else {
3390                         /*
3391                          * XXX - Can this be out of the loop?
3392                          */
3393                         ip->i_flag |= IUPD | ICHG;
3394                         if (i_size_changed) {
3395                                 ip->i_flag |= IATTCHG;
3396                         }
3397                         if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3398                             (IEXEC >> 10))) != 0 &&
3399                             (ip->i_char & (ISUID | ISGID)) != 0 &&
3400                             secpolicy_vnode_setid_retain(cr,
3401                             (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3402                                 /*
3403                                  * Clear Set-UID & Set-GID bits on
3404                                  * successful write if not privileged
3405                                  * and at least one of the execute bits
3406                                  * is set.  If we always clear Set-GID,
3407                                  * mandatory file and record locking is
3408                                  * unuseable.
3409                                  */
3410                                 ip->i_char &= ~(ISUID | ISGID);
3411                         }
3412                 }
3413         } while (error == 0 && uio->uio_resid > 0 && n != 0);
3414 
3415 out:
3416         /*
3417          * Inode is updated according to this table -
3418          *
3419          *      FSYNC   FDSYNC(posix.4)
3420          *      --------------------------
3421          *      always@ IATTCHG|IBDWRITE
3422          *
3423          * @ -  If we are doing synchronous write the only time we should
3424          *      not be sync'ing the ip here is if we have the stickyhack
3425          *      activated, the file is marked with the sticky bit and
3426          *      no exec bit, the file length has not been changed and
3427          *      no new blocks have been allocated during this write.
3428          */
3429         if ((ip->i_flag & ISYNC) != 0) {
3430                 /*
3431                  * we have eliminated nosync
3432                  */
3433                 if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3434                     ((ioflag & FSYNC) && iupdat_flag)) {
3435                         ud_iupdat(ip, 1);
3436                 }
3437         }
3438 
3439         /*
3440          * If we've already done a partial-write, terminate
3441          * the write but return no error.
3442          */
3443         if (start_resid != uio->uio_resid) {
3444                 error = 0;
3445         }
3446         ip->i_flag &= ~(INOACC | ISYNC);
3447         ITIMES_NOLOCK(ip);
3448 
3449         return (error);
3450 }
3451 
3452 int32_t
3453 ud_multi_strat(struct ud_inode *ip,
3454         page_t *pp, struct buf *bp, u_offset_t start)
3455 {
3456         daddr_t bn;
3457         int32_t error = 0, io_count, contig, alloc_sz, i;
3458         uint32_t io_off;
3459         mio_master_t *mm = NULL;
3460         mio_slave_t *ms = NULL;
3461         struct buf *rbp;
3462 
3463         ASSERT(!(start & PAGEOFFSET));
3464 
3465         /*
3466          * Figure out how many buffers to allocate
3467          */
3468         io_count = 0;
3469         for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3470                 contig = 0;
3471                 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3472                     &bn, &contig)) {
3473                         goto end;
3474                 }
3475                 if (contig == 0) {
3476                         goto end;
3477                 }
3478                 contig = MIN(contig, PAGESIZE - io_off);
3479                 if (bn != UDF_HOLE) {
3480                         io_count ++;
3481                 } else {
3482                         /*
3483                          * HOLE
3484                          */
3485                         if (bp->b_flags & B_READ) {
3486 
3487                                 /*
3488                                  * This is a hole and is read
3489                                  * it should be filled with 0's
3490                                  */
3491                                 pagezero(pp, io_off, contig);
3492                         }
3493                 }
3494         }
3495 
3496 
3497         if (io_count != 0) {
3498 
3499                 /*
3500                  * Allocate memory for all the
3501                  * required number of buffers
3502                  */
3503                 alloc_sz = sizeof (mio_master_t) +
3504                     (sizeof (mio_slave_t) * io_count);
3505                 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3506                 if (mm == NULL) {
3507                         error = ENOMEM;
3508                         goto end;
3509                 }
3510 
3511                 /*
3512                  * initialize master
3513                  */
3514                 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3515                 mm->mm_size = alloc_sz;
3516                 mm->mm_bp = bp;
3517                 mm->mm_resid = 0;
3518                 mm->mm_error = 0;
3519                 mm->mm_index = master_index++;
3520 
3521                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3522 
3523                 /*
3524                  * Initialize buffers
3525                  */
3526                 io_count = 0;
3527                 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3528                         contig = 0;
3529                         if (error = ud_bmap_read(ip,
3530                             (u_offset_t)(start + io_off),
3531                             &bn, &contig)) {
3532                                 goto end;
3533                         }
3534                         ASSERT(contig);
3535                         if ((io_off + contig) > bp->b_bcount) {
3536                                 contig = bp->b_bcount - io_off;
3537                         }
3538                         if (bn != UDF_HOLE) {
3539                                 /*
3540                                  * Clone the buffer
3541                                  * and prepare to start I/O
3542                                  */
3543                                 ms->ms_ptr = mm;
3544                                 bioinit(&ms->ms_buf);
3545                                 rbp = bioclone(bp, io_off, (size_t)contig,
3546                                     bp->b_edev, bn, ud_slave_done,
3547                                     &ms->ms_buf, KM_NOSLEEP);
3548                                 ASSERT(rbp == &ms->ms_buf);
3549                                 mm->mm_resid += contig;
3550                                 io_count++;
3551                                 ms ++;
3552                         }
3553                 }
3554 
3555                 /*
3556                  * Start I/O's
3557                  */
3558                 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3559                 for (i = 0; i < io_count; i++) {
3560                         (void) bdev_strategy(&ms->ms_buf);
3561                         ms ++;
3562                 }
3563         }
3564 
3565 end:
3566         if (error != 0) {
3567                 bp->b_flags |= B_ERROR;
3568                 bp->b_error = error;
3569                 if (mm != NULL) {
3570                         mutex_destroy(&mm->mm_mutex);
3571                         kmem_free(mm, mm->mm_size);
3572                 }
3573         }
3574         return (error);
3575 }
3576 
3577 int32_t
3578 ud_slave_done(struct buf *bp)
3579 {
3580         mio_master_t *mm;
3581         int32_t resid;
3582 
3583         ASSERT(SEMA_HELD(&bp->b_sem));
3584         ASSERT((bp->b_flags & B_DONE) == 0);
3585 
3586         mm = ((mio_slave_t *)bp)->ms_ptr;
3587 
3588         /*
3589          * Propagate error and byte count info from slave struct to
3590          * the master struct
3591          */
3592         mutex_enter(&mm->mm_mutex);
3593         if (bp->b_flags & B_ERROR) {
3594 
3595                 /*
3596                  * If multiple slave buffers get
3597                  * error we forget the old errors
3598                  * this is ok because we any way
3599                  * cannot return multiple errors
3600                  */
3601                 mm->mm_error = bp->b_error;
3602         }
3603         mm->mm_resid -= bp->b_bcount;
3604         resid = mm->mm_resid;
3605         mutex_exit(&mm->mm_mutex);
3606 
3607         /*
3608          * free up the resources allocated to cloned buffers.
3609          */
3610         bp_mapout(bp);
3611         biofini(bp);
3612 
3613         if (resid == 0) {
3614 
3615                 /*
3616                  * This is the last I/O operation
3617                  * clean up and return the original buffer
3618                  */
3619                 if (mm->mm_error) {
3620                         mm->mm_bp->b_flags |= B_ERROR;
3621                         mm->mm_bp->b_error = mm->mm_error;
3622                 }
3623                 biodone(mm->mm_bp);
3624                 mutex_destroy(&mm->mm_mutex);
3625                 kmem_free(mm, mm->mm_size);
3626         }
3627         return (0);
3628 }