1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 #include <sys/param.h>
  33 #include <sys/t_lock.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/user.h>
  37 #include <sys/buf.h>
  38 #include <sys/stat.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vfs_opreg.h>
  41 #include <sys/dirent.h>
  42 #include <sys/vnode.h>
  43 #include <sys/proc.h>
  44 #include <sys/file.h>
  45 #include <sys/fcntl.h>
  46 #include <sys/uio.h>
  47 #include <sys/fs/pc_label.h>
  48 #include <sys/fs/pc_fs.h>
  49 #include <sys/fs/pc_dir.h>
  50 #include <sys/fs/pc_node.h>
  51 #include <sys/mman.h>
  52 #include <sys/pathname.h>
  53 #include <sys/vmsystm.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/debug.h>
  56 #include <sys/statvfs.h>
  57 #include <sys/unistd.h>
  58 #include <sys/kmem.h>
  59 #include <sys/conf.h>
  60 #include <sys/flock.h>
  61 #include <sys/policy.h>
  62 #include <sys/sdt.h>
  63 #include <sys/sunddi.h>
  64 #include <sys/types.h>
  65 #include <sys/errno.h>
  66 
  67 #include <vm/seg.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg_map.h>
  71 #include <vm/seg_vn.h>
  72 #include <vm/hat.h>
  73 #include <vm/as.h>
  74 #include <vm/seg_kmem.h>
  75 
  76 #include <fs/fs_subr.h>
  77 
  78 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
  79 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
  80         caller_context_t *ct);
  81 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
  82         caller_context_t *);
  83 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
  84         caller_context_t *);
  85 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
  86         caller_context_t *ct);
  87 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
  88         caller_context_t *);
  89 static int pcfs_access(struct vnode *, int, int, struct cred *,
  90         caller_context_t *ct);
  91 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
  92         struct pathname *, int, struct vnode *, struct cred *,
  93         caller_context_t *, int *, pathname_t *);
  94 static int pcfs_create(struct vnode *, char *, struct vattr *,
  95         enum vcexcl, int mode, struct vnode **, struct cred *, int,
  96         caller_context_t *, vsecattr_t *);
  97 static int pcfs_remove(struct vnode *, char *, struct cred *,
  98         caller_context_t *, int);
  99 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
 100         struct cred *, caller_context_t *, int);
 101 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
 102         struct cred *, caller_context_t *, int, vsecattr_t *);
 103 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
 104         caller_context_t *, int);
 105 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
 106         caller_context_t *, int);
 107 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
 108 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
 109 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
 110 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
 111         offset_t, cred_t *, caller_context_t *);
 112 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
 113         size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
 114         caller_context_t *);
 115 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
 116         page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
 117 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
 118         caller_context_t *);
 119 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
 120         uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 121 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
 122         size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 123 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
 124         size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
 125 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
 126         caller_context_t *);
 127 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
 128         caller_context_t *);
 129 
 130 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
 131         struct cred *);
 132 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
 133 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
 134 
 135 extern krwlock_t pcnodes_lock;
 136 
 137 #define lround(r)       (((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
 138 
 139 /*
 140  * vnode op vectors for files and directories.
 141  */
 142 struct vnodeops *pcfs_fvnodeops;
 143 struct vnodeops *pcfs_dvnodeops;
 144 
 145 const fs_operation_def_t pcfs_fvnodeops_template[] = {
 146         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 147         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 148         VOPNAME_READ,           { .vop_read = pcfs_read },
 149         VOPNAME_WRITE,          { .vop_write = pcfs_write },
 150         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 151         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 152         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 153         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 154         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 155         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 156         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 157         VOPNAME_SPACE,          { .vop_space = pcfs_space },
 158         VOPNAME_GETPAGE,        { .vop_getpage = pcfs_getpage },
 159         VOPNAME_PUTPAGE,        { .vop_putpage = pcfs_putpage },
 160         VOPNAME_MAP,            { .vop_map = pcfs_map },
 161         VOPNAME_ADDMAP,         { .vop_addmap = pcfs_addmap },
 162         VOPNAME_DELMAP,         { .vop_delmap = pcfs_delmap },
 163         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 164         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 165         NULL,                   NULL
 166 };
 167 
 168 const fs_operation_def_t pcfs_dvnodeops_template[] = {
 169         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 170         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 171         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 172         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 173         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 174         VOPNAME_LOOKUP,         { .vop_lookup = pcfs_lookup },
 175         VOPNAME_CREATE,         { .vop_create = pcfs_create },
 176         VOPNAME_REMOVE,         { .vop_remove = pcfs_remove },
 177         VOPNAME_RENAME,         { .vop_rename = pcfs_rename },
 178         VOPNAME_MKDIR,          { .vop_mkdir = pcfs_mkdir },
 179         VOPNAME_RMDIR,          { .vop_rmdir = pcfs_rmdir },
 180         VOPNAME_READDIR,        { .vop_readdir = pcfs_readdir },
 181         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 182         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 183         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 184         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 185         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 186         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 187         NULL,                   NULL
 188 };
 189 
 190 
 191 /*ARGSUSED*/
 192 static int
 193 pcfs_open(
 194         struct vnode **vpp,
 195         int flag,
 196         struct cred *cr,
 197         caller_context_t *ct)
 198 {
 199         return (0);
 200 }
 201 
 202 /*
 203  * files are sync'ed on close to keep floppy up to date
 204  */
 205 
 206 /*ARGSUSED*/
 207 static int
 208 pcfs_close(
 209         struct vnode *vp,
 210         int flag,
 211         int count,
 212         offset_t offset,
 213         struct cred *cr,
 214         caller_context_t *ct)
 215 {
 216         return (0);
 217 }
 218 
 219 /*ARGSUSED*/
 220 static int
 221 pcfs_read(
 222         struct vnode *vp,
 223         struct uio *uiop,
 224         int ioflag,
 225         struct cred *cr,
 226         struct caller_context *ct)
 227 {
 228         struct pcfs *fsp;
 229         struct pcnode *pcp;
 230         int error;
 231 
 232         fsp = VFSTOPCFS(vp->v_vfsp);
 233         if (error = pc_verify(fsp))
 234                 return (error);
 235         error = pc_lockfs(fsp, 0, 0);
 236         if (error)
 237                 return (error);
 238         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 239                 pc_unlockfs(fsp);
 240                 return (EIO);
 241         }
 242         error = rwpcp(pcp, uiop, UIO_READ, ioflag);
 243         if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
 244                 pc_mark_acc(fsp, pcp);
 245         }
 246         pc_unlockfs(fsp);
 247         if (error) {
 248                 PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
 249         }
 250         return (error);
 251 }
 252 
 253 /*ARGSUSED*/
 254 static int
 255 pcfs_write(
 256         struct vnode *vp,
 257         struct uio *uiop,
 258         int ioflag,
 259         struct cred *cr,
 260         struct caller_context *ct)
 261 {
 262         struct pcfs *fsp;
 263         struct pcnode *pcp;
 264         int error;
 265 
 266         fsp = VFSTOPCFS(vp->v_vfsp);
 267         if (error = pc_verify(fsp))
 268                 return (error);
 269         error = pc_lockfs(fsp, 0, 0);
 270         if (error)
 271                 return (error);
 272         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 273                 pc_unlockfs(fsp);
 274                 return (EIO);
 275         }
 276         if (ioflag & FAPPEND) {
 277                 /*
 278                  * in append mode start at end of file.
 279                  */
 280                 uiop->uio_loffset = pcp->pc_size;
 281         }
 282         error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
 283         pcp->pc_flags |= PC_MOD;
 284         pc_mark_mod(fsp, pcp);
 285         if (ioflag & (FSYNC|FDSYNC))
 286                 (void) pc_nodeupdate(pcp);
 287 
 288         pc_unlockfs(fsp);
 289         if (error) {
 290                 PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
 291         }
 292         return (error);
 293 }
 294 
 295 /*
 296  * read or write a vnode
 297  */
 298 static int
 299 rwpcp(
 300         struct pcnode *pcp,
 301         struct uio *uio,
 302         enum uio_rw rw,
 303         int ioflag)
 304 {
 305         struct vnode *vp = PCTOV(pcp);
 306         struct pcfs *fsp;
 307         daddr_t bn;                     /* phys block number */
 308         int n;
 309         offset_t off;
 310         caddr_t base;
 311         int mapon, pagecreate;
 312         int newpage;
 313         int error = 0;
 314         rlim64_t limit = uio->uio_llimit;
 315         int oresid = uio->uio_resid;
 316 
 317         /*
 318          * If the filesystem was umounted by force, return immediately.
 319          */
 320         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 321                 return (EIO);
 322 
 323         PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
 324             uio->uio_loffset, uio->uio_resid, pcp->pc_size);
 325 
 326         ASSERT(rw == UIO_READ || rw == UIO_WRITE);
 327         ASSERT(vp->v_type == VREG);
 328 
 329         if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
 330                 return (0);
 331         }
 332 
 333         if (uio->uio_loffset < 0)
 334                 return (EINVAL);
 335 
 336         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 337                 limit = MAXOFFSET_T;
 338 
 339         if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
 340                 proc_t *p = ttoproc(curthread);
 341 
 342                 mutex_enter(&p->p_lock);
 343                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
 344                     p, RCA_UNSAFE_SIGINFO);
 345                 mutex_exit(&p->p_lock);
 346                 return (EFBIG);
 347         }
 348 
 349         /* the following condition will occur only for write */
 350 
 351         if (uio->uio_loffset >= UINT32_MAX)
 352                 return (EFBIG);
 353 
 354         if (uio->uio_resid == 0)
 355                 return (0);
 356 
 357         if (limit > UINT32_MAX)
 358                 limit = UINT32_MAX;
 359 
 360         fsp = VFSTOPCFS(vp->v_vfsp);
 361         if (fsp->pcfs_flags & PCFS_IRRECOV)
 362                 return (EIO);
 363 
 364         do {
 365                 /*
 366                  * Assignments to "n" in this block may appear
 367                  * to overflow in some cases.  However, after careful
 368                  * analysis it was determined that all assignments to
 369                  * "n" serve only to make "n" smaller.  Since "n"
 370                  * starts out as no larger than MAXBSIZE, "int" is
 371                  * safe.
 372                  */
 373                 off = uio->uio_loffset & MAXBMASK;
 374                 mapon = (int)(uio->uio_loffset & MAXBOFFSET);
 375                 n = MIN(MAXBSIZE - mapon, uio->uio_resid);
 376                 if (rw == UIO_READ) {
 377                         offset_t diff;
 378 
 379                         diff = pcp->pc_size - uio->uio_loffset;
 380                         if (diff <= 0)
 381                                 return (0);
 382                         if (diff < n)
 383                                 n = (int)diff;
 384                 }
 385                 /*
 386                  * Compare limit with the actual offset + n, not the
 387                  * rounded down offset "off" or we will overflow
 388                  * the maximum file size after all.
 389                  */
 390                 if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
 391                         if (uio->uio_loffset >= limit) {
 392                                 error = EFBIG;
 393                                 break;
 394                         }
 395                         n = (int)(limit - uio->uio_loffset);
 396                 }
 397 
 398                 /*
 399                  * Touch the page and fault it in if it is not in
 400                  * core before segmap_getmapflt can lock it. This
 401                  * is to avoid the deadlock if the buffer is mapped
 402                  * to the same file through mmap which we want to
 403                  * write to.
 404                  */
 405                 uio_prefaultpages((long)n, uio);
 406 
 407                 base = segmap_getmap(segkmap, vp, (u_offset_t)off);
 408                 pagecreate = 0;
 409                 newpage = 0;
 410                 if (rw == UIO_WRITE) {
 411                         /*
 412                          * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
 413                          * with one page at a time, instead of one MAXBSIZE
 414                          * at a time, so we can fully explore pagecreate
 415                          * optimization??
 416                          */
 417                         if (uio->uio_loffset + n > pcp->pc_size) {
 418                                 uint_t ncl, lcn;
 419 
 420                                 ncl = (uint_t)howmany((offset_t)pcp->pc_size,
 421                                     fsp->pcfs_clsize);
 422                                 if (uio->uio_loffset > pcp->pc_size &&
 423                                     ncl < (uint_t)howmany(uio->uio_loffset,
 424                                     fsp->pcfs_clsize)) {
 425                                         /*
 426                                          * Allocate and zerofill skipped
 427                                          * clusters. This may not be worth the
 428                                          * effort since a small lseek beyond
 429                                          * eof but still within the cluster
 430                                          * will not be zeroed out.
 431                                          */
 432                                         lcn = pc_lblkno(fsp, uio->uio_loffset);
 433                                         error = pc_balloc(pcp, (daddr_t)lcn,
 434                                             1, &bn);
 435                                         ncl = lcn + 1;
 436                                 }
 437                                 if (!error &&
 438                                     ncl < (uint_t)howmany(uio->uio_loffset + n,
 439                                     fsp->pcfs_clsize))
 440                                         /*
 441                                          * allocate clusters w/o zerofill
 442                                          */
 443                                         error = pc_balloc(pcp,
 444                                             (daddr_t)pc_lblkno(fsp,
 445                                             uio->uio_loffset + n - 1),
 446                                             0, &bn);
 447 
 448                                 pcp->pc_flags |= PC_CHG;
 449 
 450                                 if (error) {
 451                                         pc_cluster32_t ncl;
 452                                         int nerror;
 453 
 454                                         /*
 455                                          * figure out new file size from
 456                                          * cluster chain length. If this
 457                                          * is detected to loop, the chain
 458                                          * is corrupted and we'd better
 459                                          * keep our fingers off that file.
 460                                          */
 461                                         nerror = pc_fileclsize(fsp,
 462                                             pcp->pc_scluster, &ncl);
 463                                         if (nerror) {
 464                                                 PC_DPRINTF1(2,
 465                                                     "cluster chain "
 466                                                     "corruption, "
 467                                                     "scluster=%d\n",
 468                                                     pcp->pc_scluster);
 469                                                 pcp->pc_size = 0;
 470                                                 pcp->pc_flags |= PC_INVAL;
 471                                                 error = nerror;
 472                                                 (void) segmap_release(segkmap,
 473                                                     base, 0);
 474                                                 break;
 475                                         }
 476                                         pcp->pc_size = fsp->pcfs_clsize * ncl;
 477 
 478                                         if (error == ENOSPC &&
 479                                             (pcp->pc_size - uio->uio_loffset)
 480                                             > 0) {
 481                                                 PC_DPRINTF3(2, "rwpcp ENOSPC "
 482                                                     "off=%lld n=%d size=%d\n",
 483                                                     uio->uio_loffset,
 484                                                     n, pcp->pc_size);
 485                                                 n = (int)(pcp->pc_size -
 486                                                     uio->uio_loffset);
 487                                         } else {
 488                                                 PC_DPRINTF1(1,
 489                                                     "rwpcp error1=%d\n", error);
 490                                                 (void) segmap_release(segkmap,
 491                                                     base, 0);
 492                                                 break;
 493                                         }
 494                                 } else {
 495                                         pcp->pc_size =
 496                                             (uint_t)(uio->uio_loffset + n);
 497                                 }
 498                                 if (mapon == 0) {
 499                                         newpage = segmap_pagecreate(segkmap,
 500                                             base, (size_t)n, 0);
 501                                         pagecreate = 1;
 502                                 }
 503                         } else if (n == MAXBSIZE) {
 504                                 newpage = segmap_pagecreate(segkmap, base,
 505                                     (size_t)n, 0);
 506                                 pagecreate = 1;
 507                         }
 508                 }
 509                 error = uiomove(base + mapon, (size_t)n, rw, uio);
 510 
 511                 if (pagecreate && uio->uio_loffset <
 512                     roundup(off + mapon + n, PAGESIZE)) {
 513                         offset_t nzero, nmoved;
 514 
 515                         nmoved = uio->uio_loffset - (off + mapon);
 516                         nzero = roundup(mapon + n, PAGESIZE) - nmoved;
 517                         (void) kzero(base + mapon + nmoved, (size_t)nzero);
 518                 }
 519 
 520                 /*
 521                  * Unlock the pages which have been allocated by
 522                  * page_create_va() in segmap_pagecreate().
 523                  */
 524                 if (newpage) {
 525                         segmap_pageunlock(segkmap, base, (size_t)n,
 526                             rw == UIO_WRITE ? S_WRITE : S_READ);
 527                 }
 528 
 529                 if (error) {
 530                         PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
 531                         /*
 532                          * If we failed on a write, we may have already
 533                          * allocated file blocks as well as pages.  It's hard
 534                          * to undo the block allocation, but we must be sure
 535                          * to invalidate any pages that may have been
 536                          * allocated.
 537                          */
 538                         if (rw == UIO_WRITE)
 539                                 (void) segmap_release(segkmap, base, SM_INVAL);
 540                         else
 541                                 (void) segmap_release(segkmap, base, 0);
 542                 } else {
 543                         uint_t flags = 0;
 544 
 545                         if (rw == UIO_READ) {
 546                                 if (n + mapon == MAXBSIZE ||
 547                                     uio->uio_loffset == pcp->pc_size)
 548                                         flags = SM_DONTNEED;
 549                         } else if (ioflag & (FSYNC|FDSYNC)) {
 550                                 flags = SM_WRITE;
 551                         } else if (n + mapon == MAXBSIZE) {
 552                                 flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
 553                         }
 554                         error = segmap_release(segkmap, base, flags);
 555                 }
 556 
 557         } while (error == 0 && uio->uio_resid > 0 && n != 0);
 558 
 559         if (oresid != uio->uio_resid)
 560                 error = 0;
 561         return (error);
 562 }
 563 
 564 /*ARGSUSED*/
 565 static int
 566 pcfs_getattr(
 567         struct vnode *vp,
 568         struct vattr *vap,
 569         int flags,
 570         struct cred *cr,
 571         caller_context_t *ct)
 572 {
 573         struct pcnode *pcp;
 574         struct pcfs *fsp;
 575         int error;
 576         char attr;
 577         struct pctime atime;
 578         int64_t unixtime;
 579 
 580         PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
 581 
 582         fsp = VFSTOPCFS(vp->v_vfsp);
 583         error = pc_lockfs(fsp, 0, 0);
 584         if (error)
 585                 return (error);
 586 
 587         /*
 588          * Note that we don't check for "invalid node" (PC_INVAL) here
 589          * only in order to make stat() succeed. We allow no I/O on such
 590          * a node, but do allow to check for its existence.
 591          */
 592         if ((pcp = VTOPC(vp)) == NULL) {
 593                 pc_unlockfs(fsp);
 594                 return (EIO);
 595         }
 596         /*
 597          * Copy from pcnode.
 598          */
 599         vap->va_type = vp->v_type;
 600         attr = pcp->pc_entry.pcd_attr;
 601         if (PCA_IS_HIDDEN(fsp, attr))
 602                 vap->va_mode = 0;
 603         else if (attr & PCA_LABEL)
 604                 vap->va_mode = 0444;
 605         else if (attr & PCA_RDONLY)
 606                 vap->va_mode = 0555;
 607         else if (fsp->pcfs_flags & PCFS_BOOTPART) {
 608                 vap->va_mode = 0755;
 609         } else {
 610                 vap->va_mode = 0777;
 611         }
 612 
 613         if (attr & PCA_DIR)
 614                 vap->va_mode |= S_IFDIR;
 615         else
 616                 vap->va_mode |= S_IFREG;
 617         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 618                 vap->va_uid = 0;
 619                 vap->va_gid = 0;
 620         } else {
 621                 vap->va_uid = crgetuid(cr);
 622                 vap->va_gid = crgetgid(cr);
 623         }
 624         vap->va_fsid = vp->v_vfsp->vfs_dev;
 625         vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
 626             pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
 627             pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
 628         vap->va_nlink = 1;
 629         vap->va_size = (u_offset_t)pcp->pc_size;
 630         vap->va_rdev = 0;
 631         vap->va_nblocks =
 632             (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
 633         vap->va_blksize = fsp->pcfs_clsize;
 634 
 635         /*
 636          * FAT root directories have no timestamps. In order not to return
 637          * "time zero" (1/1/1970), we record the time of the mount and give
 638          * that. This breaks less expectations.
 639          */
 640         if (vp->v_flag & VROOT) {
 641                 vap->va_mtime = fsp->pcfs_mounttime;
 642                 vap->va_atime = fsp->pcfs_mounttime;
 643                 vap->va_ctime = fsp->pcfs_mounttime;
 644                 pc_unlockfs(fsp);
 645                 return (0);
 646         }
 647 
 648         pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
 649         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 650                 if (unixtime > INT32_MAX)
 651                         DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
 652                 unixtime = MIN(unixtime, INT32_MAX);
 653         } else if (unixtime > INT32_MAX &&
 654             get_udatamodel() == DATAMODEL_ILP32) {
 655                 pc_unlockfs(fsp);
 656                 DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
 657                 return (EOVERFLOW);
 658         }
 659 
 660         vap->va_mtime.tv_sec = (time_t)unixtime;
 661         vap->va_mtime.tv_nsec = 0;
 662 
 663         /*
 664          * FAT doesn't know about POSIX ctime.
 665          * Best approximation is to always set it to mtime.
 666          */
 667         vap->va_ctime = vap->va_mtime;
 668 
 669         /*
 670          * FAT only stores "last access date". If that's the
 671          * same as the date of last modification then the time
 672          * of last access is known. Otherwise, use midnight.
 673          */
 674         atime.pct_date = pcp->pc_entry.pcd_ladate;
 675         if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
 676                 atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
 677         else
 678                 atime.pct_time = 0;
 679         pc_pcttotv(&atime, &unixtime);
 680         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 681                 if (unixtime > INT32_MAX)
 682                         DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
 683                 unixtime = MIN(unixtime, INT32_MAX);
 684         } else if (unixtime > INT32_MAX &&
 685             get_udatamodel() == DATAMODEL_ILP32) {
 686                 pc_unlockfs(fsp);
 687                 DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
 688                 return (EOVERFLOW);
 689         }
 690 
 691         vap->va_atime.tv_sec = (time_t)unixtime;
 692         vap->va_atime.tv_nsec = 0;
 693 
 694         pc_unlockfs(fsp);
 695         return (0);
 696 }
 697 
 698 
 699 /*ARGSUSED*/
 700 static int
 701 pcfs_setattr(
 702         struct vnode *vp,
 703         struct vattr *vap,
 704         int flags,
 705         struct cred *cr,
 706         caller_context_t *ct)
 707 {
 708         struct pcnode *pcp;
 709         mode_t mask = vap->va_mask;
 710         int error;
 711         struct pcfs *fsp;
 712         timestruc_t now, *timep;
 713 
 714         PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
 715         /*
 716          * cannot set these attributes
 717          */
 718         if (mask & (AT_NOSET | AT_UID | AT_GID)) {
 719                 return (EINVAL);
 720         }
 721         /*
 722          * pcfs_setattr is now allowed on directories to avoid silly warnings
 723          * from 'tar' when it tries to set times on a directory, and console
 724          * printf's on the NFS server when it gets EINVAL back on such a
 725          * request. One possible problem with that since a directory entry
 726          * identifies a file, '.' and all the '..' entries in subdirectories
 727          * may get out of sync when the directory is updated since they're
 728          * treated like separate files. We could fix that by looking for
 729          * '.' and giving it the same attributes, and then looking for
 730          * all the subdirectories and updating '..', but that's pretty
 731          * expensive for something that doesn't seem likely to matter.
 732          */
 733         /* can't do some ops on directories anyway */
 734         if ((vp->v_type == VDIR) &&
 735             (mask & AT_SIZE)) {
 736                 return (EINVAL);
 737         }
 738 
 739         fsp = VFSTOPCFS(vp->v_vfsp);
 740         error = pc_lockfs(fsp, 0, 0);
 741         if (error)
 742                 return (error);
 743         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 744                 pc_unlockfs(fsp);
 745                 return (EIO);
 746         }
 747 
 748         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 749                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
 750                         pc_unlockfs(fsp);
 751                         return (EACCES);
 752                 }
 753         }
 754 
 755         /*
 756          * Change file access modes.
 757          * If nobody has write permission, file is marked readonly.
 758          * Otherwise file is writable by anyone.
 759          */
 760         if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
 761                 if ((vap->va_mode & 0222) == 0)
 762                         pcp->pc_entry.pcd_attr |= PCA_RDONLY;
 763                 else
 764                         pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
 765                 pcp->pc_flags |= PC_CHG;
 766         }
 767         /*
 768          * Truncate file. Must have write permission.
 769          */
 770         if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
 771                 if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
 772                         error = EACCES;
 773                         goto out;
 774                 }
 775                 if (vap->va_size > UINT32_MAX) {
 776                         error = EFBIG;
 777                         goto out;
 778                 }
 779                 error = pc_truncate(pcp, (uint_t)vap->va_size);
 780 
 781                 if (error)
 782                         goto out;
 783 
 784                 if (vap->va_size == 0) {
 785                         vnevent_truncate(vp, ct);
 786                 } else {
 787                         vnevent_resize(vp, ct);
 788                 }
 789         }
 790         /*
 791          * Change file modified times.
 792          */
 793         if (mask & (AT_MTIME | AT_CTIME)) {
 794                 /*
 795                  * If SysV-compatible option to set access and
 796                  * modified times if privileged, owner, or write access,
 797                  * use current time rather than va_mtime.
 798                  *
 799                  * XXX - va_mtime.tv_sec == -1 flags this.
 800                  */
 801                 timep = &vap->va_mtime;
 802                 if (vap->va_mtime.tv_sec == -1) {
 803                         gethrestime(&now);
 804                         timep = &now;
 805                 }
 806                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 807                     timep->tv_sec > INT32_MAX) {
 808                         error = EOVERFLOW;
 809                         goto out;
 810                 }
 811                 error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
 812                 if (error)
 813                         goto out;
 814                 pcp->pc_flags |= PC_CHG;
 815         }
 816         /*
 817          * Change file access times.
 818          */
 819         if (mask & AT_ATIME) {
 820                 /*
 821                  * If SysV-compatible option to set access and
 822                  * modified times if privileged, owner, or write access,
 823                  * use current time rather than va_mtime.
 824                  *
 825                  * XXX - va_atime.tv_sec == -1 flags this.
 826                  */
 827                 struct pctime   atime;
 828 
 829                 timep = &vap->va_atime;
 830                 if (vap->va_atime.tv_sec == -1) {
 831                         gethrestime(&now);
 832                         timep = &now;
 833                 }
 834                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 835                     timep->tv_sec > INT32_MAX) {
 836                         error = EOVERFLOW;
 837                         goto out;
 838                 }
 839                 error = pc_tvtopct(timep, &atime);
 840                 if (error)
 841                         goto out;
 842                 pcp->pc_entry.pcd_ladate = atime.pct_date;
 843                 pcp->pc_flags |= PC_CHG;
 844         }
 845 out:
 846         pc_unlockfs(fsp);
 847         return (error);
 848 }
 849 
 850 
 851 /*ARGSUSED*/
 852 static int
 853 pcfs_access(
 854         struct vnode *vp,
 855         int mode,
 856         int flags,
 857         struct cred *cr,
 858         caller_context_t *ct)
 859 {
 860         struct pcnode *pcp;
 861         struct pcfs *fsp;
 862 
 863 
 864         fsp = VFSTOPCFS(vp->v_vfsp);
 865 
 866         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
 867                 return (EIO);
 868         if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
 869                 return (EACCES);
 870 
 871         /*
 872          * If this is a boot partition, privileged users have full access while
 873          * others have read-only access.
 874          */
 875         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 876                 if ((mode & VWRITE) &&
 877                     secpolicy_pcfs_modify_bootpartition(cr) != 0)
 878                         return (EACCES);
 879         }
 880         return (0);
 881 }
 882 
 883 
 884 /*ARGSUSED*/
 885 static int
 886 pcfs_fsync(
 887         struct vnode *vp,
 888         int syncflag,
 889         struct cred *cr,
 890         caller_context_t *ct)
 891 {
 892         struct pcfs *fsp;
 893         struct pcnode *pcp;
 894         int error;
 895 
 896         fsp = VFSTOPCFS(vp->v_vfsp);
 897         if (error = pc_verify(fsp))
 898                 return (error);
 899         error = pc_lockfs(fsp, 0, 0);
 900         if (error)
 901                 return (error);
 902         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 903                 pc_unlockfs(fsp);
 904                 return (EIO);
 905         }
 906         rw_enter(&pcnodes_lock, RW_WRITER);
 907         error = pc_nodesync(pcp);
 908         rw_exit(&pcnodes_lock);
 909         pc_unlockfs(fsp);
 910         return (error);
 911 }
 912 
 913 
 914 /*ARGSUSED*/
 915 static void
 916 pcfs_inactive(
 917         struct vnode *vp,
 918         struct cred *cr,
 919         caller_context_t *ct)
 920 {
 921         struct pcnode *pcp;
 922         struct pcfs *fsp;
 923         int error;
 924 
 925         fsp = VFSTOPCFS(vp->v_vfsp);
 926         error = pc_lockfs(fsp, 0, 1);
 927 
 928         /*
 929          * If the filesystem was umounted by force, all dirty
 930          * pages associated with this vnode are invalidated
 931          * and then the vnode will be freed.
 932          */
 933         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
 934                 pcp = VTOPC(vp);
 935                 if (vn_has_cached_data(vp)) {
 936                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 937                             pcfs_putapage, B_INVAL, (struct cred *)NULL);
 938                 }
 939                 remque(pcp);
 940                 if (error == 0)
 941                         pc_unlockfs(fsp);
 942                 vn_free(vp);
 943                 kmem_free(pcp, sizeof (struct pcnode));
 944                 VFS_RELE(PCFSTOVFS(fsp));
 945                 return;
 946         }
 947 
 948         mutex_enter(&vp->v_lock);
 949         ASSERT(vp->v_count >= 1);
 950         if (vp->v_count > 1) {
 951                 vp->v_count--;  /* release our hold from vn_rele */
 952                 mutex_exit(&vp->v_lock);
 953                 pc_unlockfs(fsp);
 954                 return;
 955         }
 956         mutex_exit(&vp->v_lock);
 957 
 958         /*
 959          * Check again to confirm that no intervening I/O error
 960          * with a subsequent pc_diskchanged() call has released
 961          * the pcnode. If it has then release the vnode as above.
 962          */
 963         pcp = VTOPC(vp);
 964         if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
 965                 if (vn_has_cached_data(vp))
 966                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 967                             pcfs_putapage, B_INVAL | B_TRUNC,
 968                             (struct cred *)NULL);
 969         }
 970 
 971         if (pcp == NULL) {
 972                 vn_free(vp);
 973         } else {
 974                 pc_rele(pcp);
 975         }
 976 
 977         if (!error)
 978                 pc_unlockfs(fsp);
 979 }
 980 
 981 /*ARGSUSED*/
 982 static int
 983 pcfs_lookup(
 984         struct vnode *dvp,
 985         char *nm,
 986         struct vnode **vpp,
 987         struct pathname *pnp,
 988         int flags,
 989         struct vnode *rdir,
 990         struct cred *cr,
 991         caller_context_t *ct,
 992         int *direntflags,
 993         pathname_t *realpnp)
 994 {
 995         struct pcfs *fsp;
 996         struct pcnode *pcp;
 997         int error;
 998 
 999         /*
1000          * If the filesystem was umounted by force, return immediately.
1001          */
1002         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1003                 return (EIO);
1004 
1005         /*
1006          * verify that the dvp is still valid on the disk
1007          */
1008         fsp = VFSTOPCFS(dvp->v_vfsp);
1009         if (error = pc_verify(fsp))
1010                 return (error);
1011         error = pc_lockfs(fsp, 0, 0);
1012         if (error)
1013                 return (error);
1014         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1015                 pc_unlockfs(fsp);
1016                 return (EIO);
1017         }
1018         /*
1019          * Null component name is a synonym for directory being searched.
1020          */
1021         if (*nm == '\0') {
1022                 VN_HOLD(dvp);
1023                 *vpp = dvp;
1024                 pc_unlockfs(fsp);
1025                 return (0);
1026         }
1027 
1028         error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1029         if (!error) {
1030                 *vpp = PCTOV(pcp);
1031                 pcp->pc_flags |= PC_EXTERNAL;
1032         }
1033         pc_unlockfs(fsp);
1034         return (error);
1035 }
1036 
1037 
1038 /*ARGSUSED*/
1039 static int
1040 pcfs_create(
1041         struct vnode *dvp,
1042         char *nm,
1043         struct vattr *vap,
1044         enum vcexcl exclusive,
1045         int mode,
1046         struct vnode **vpp,
1047         struct cred *cr,
1048         int flag,
1049         caller_context_t *ct,
1050         vsecattr_t *vsecp)
1051 {
1052         int error;
1053         struct pcnode *pcp;
1054         struct vnode *vp;
1055         struct pcfs *fsp;
1056 
1057         /*
1058          * can't create directories. use pcfs_mkdir.
1059          * can't create anything other than files.
1060          */
1061         if (vap->va_type == VDIR)
1062                 return (EISDIR);
1063         else if (vap->va_type != VREG)
1064                 return (EINVAL);
1065 
1066         pcp = NULL;
1067         fsp = VFSTOPCFS(dvp->v_vfsp);
1068         error = pc_lockfs(fsp, 0, 0);
1069         if (error)
1070                 return (error);
1071         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1072                 pc_unlockfs(fsp);
1073                 return (EIO);
1074         }
1075 
1076         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1077                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1078                         pc_unlockfs(fsp);
1079                         return (EACCES);
1080                 }
1081         }
1082 
1083         if (*nm == '\0') {
1084                 /*
1085                  * Null component name refers to the directory itself.
1086                  */
1087                 VN_HOLD(dvp);
1088                 pcp = VTOPC(dvp);
1089                 error = EEXIST;
1090         } else {
1091                 error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1092         }
1093         /*
1094          * if file exists and this is a nonexclusive create,
1095          * check for access permissions
1096          */
1097         if (error == EEXIST) {
1098                 vp = PCTOV(pcp);
1099                 if (exclusive == NONEXCL) {
1100                         if (vp->v_type == VDIR) {
1101                                 error = EISDIR;
1102                         } else if (mode) {
1103                                 error = pcfs_access(PCTOV(pcp), mode, 0,
1104                                     cr, ct);
1105                         } else {
1106                                 error = 0;
1107                         }
1108                 }
1109                 if (error) {
1110                         VN_RELE(PCTOV(pcp));
1111                 } else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1112                     (vap->va_size == 0)) {
1113                         error = pc_truncate(pcp, 0L);
1114                         if (error) {
1115                                 VN_RELE(PCTOV(pcp));
1116                         } else {
1117                                 vnevent_create(PCTOV(pcp), ct);
1118                         }
1119                 }
1120         }
1121         if (error) {
1122                 pc_unlockfs(fsp);
1123                 return (error);
1124         }
1125         *vpp = PCTOV(pcp);
1126         pcp->pc_flags |= PC_EXTERNAL;
1127         pc_unlockfs(fsp);
1128         return (error);
1129 }
1130 
1131 /*ARGSUSED*/
1132 static int
1133 pcfs_remove(
1134         struct vnode *vp,
1135         char *nm,
1136         struct cred *cr,
1137         caller_context_t *ct,
1138         int flags)
1139 {
1140         struct pcfs *fsp;
1141         struct pcnode *pcp;
1142         int error;
1143 
1144         fsp = VFSTOPCFS(vp->v_vfsp);
1145         if (error = pc_verify(fsp))
1146                 return (error);
1147         error = pc_lockfs(fsp, 0, 0);
1148         if (error)
1149                 return (error);
1150         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1151                 pc_unlockfs(fsp);
1152                 return (EIO);
1153         }
1154         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1155                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1156                         pc_unlockfs(fsp);
1157                         return (EACCES);
1158                 }
1159         }
1160         error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1161         pc_unlockfs(fsp);
1162         return (error);
1163 }
1164 
1165 /*
1166  * Rename a file or directory
1167  * This rename is restricted to only rename files within a directory.
1168  * XX should make rename more general
1169  */
1170 /*ARGSUSED*/
1171 static int
1172 pcfs_rename(
1173         struct vnode *sdvp,             /* old (source) parent vnode */
1174         char *snm,                      /* old (source) entry name */
1175         struct vnode *tdvp,             /* new (target) parent vnode */
1176         char *tnm,                      /* new (target) entry name */
1177         struct cred *cr,
1178         caller_context_t *ct,
1179         int flags)
1180 {
1181         struct pcfs *fsp;
1182         struct pcnode *dp;      /* parent pcnode */
1183         struct pcnode *tdp;
1184         int error;
1185 
1186         fsp = VFSTOPCFS(sdvp->v_vfsp);
1187         if (error = pc_verify(fsp))
1188                 return (error);
1189 
1190         /*
1191          * make sure we can muck with this directory.
1192          */
1193         error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1194         if (error) {
1195                 return (error);
1196         }
1197         error = pc_lockfs(fsp, 0, 0);
1198         if (error)
1199                 return (error);
1200         if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1201             (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1202                 pc_unlockfs(fsp);
1203                 return (EIO);
1204         }
1205         error = pc_rename(dp, tdp, snm, tnm, ct);
1206         pc_unlockfs(fsp);
1207         return (error);
1208 }
1209 
1210 /*ARGSUSED*/
1211 static int
1212 pcfs_mkdir(
1213         struct vnode *dvp,
1214         char *nm,
1215         struct vattr *vap,
1216         struct vnode **vpp,
1217         struct cred *cr,
1218         caller_context_t *ct,
1219         int flags,
1220         vsecattr_t *vsecp)
1221 {
1222         struct pcfs *fsp;
1223         struct pcnode *pcp;
1224         int error;
1225 
1226         fsp = VFSTOPCFS(dvp->v_vfsp);
1227         if (error = pc_verify(fsp))
1228                 return (error);
1229         error = pc_lockfs(fsp, 0, 0);
1230         if (error)
1231                 return (error);
1232         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1233                 pc_unlockfs(fsp);
1234                 return (EIO);
1235         }
1236 
1237         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1238                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1239                         pc_unlockfs(fsp);
1240                         return (EACCES);
1241                 }
1242         }
1243 
1244         error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1245 
1246         if (!error) {
1247                 pcp -> pc_flags |= PC_EXTERNAL;
1248                 *vpp = PCTOV(pcp);
1249         } else if (error == EEXIST) {
1250                 VN_RELE(PCTOV(pcp));
1251         }
1252         pc_unlockfs(fsp);
1253         return (error);
1254 }
1255 
1256 /*ARGSUSED*/
1257 static int
1258 pcfs_rmdir(
1259         struct vnode *dvp,
1260         char *nm,
1261         struct vnode *cdir,
1262         struct cred *cr,
1263         caller_context_t *ct,
1264         int flags)
1265 {
1266         struct pcfs *fsp;
1267         struct pcnode *pcp;
1268         int error;
1269 
1270         fsp = VFSTOPCFS(dvp -> v_vfsp);
1271         if (error = pc_verify(fsp))
1272                 return (error);
1273         if (error = pc_lockfs(fsp, 0, 0))
1274                 return (error);
1275 
1276         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1277                 pc_unlockfs(fsp);
1278                 return (EIO);
1279         }
1280 
1281         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1282                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1283                         pc_unlockfs(fsp);
1284                         return (EACCES);
1285                 }
1286         }
1287 
1288         error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1289         pc_unlockfs(fsp);
1290         return (error);
1291 }
1292 
1293 /*
1294  * read entries in a directory.
1295  * we must convert pc format to unix format
1296  */
1297 
1298 /*ARGSUSED*/
1299 static int
1300 pcfs_readdir(
1301         struct vnode *dvp,
1302         struct uio *uiop,
1303         struct cred *cr,
1304         int *eofp,
1305         caller_context_t *ct,
1306         int flags)
1307 {
1308         struct pcnode *pcp;
1309         struct pcfs *fsp;
1310         struct pcdir *ep;
1311         struct buf *bp = NULL;
1312         offset_t offset;
1313         int boff;
1314         struct pc_dirent lbp;
1315         struct pc_dirent *ld = &lbp;
1316         int error;
1317 
1318         /*
1319          * If the filesystem was umounted by force, return immediately.
1320          */
1321         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1322                 return (EIO);
1323 
1324         if ((uiop->uio_iovcnt != 1) ||
1325             (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1326                 return (EINVAL);
1327         }
1328         fsp = VFSTOPCFS(dvp->v_vfsp);
1329         /*
1330          * verify that the dp is still valid on the disk
1331          */
1332         if (error = pc_verify(fsp)) {
1333                 return (error);
1334         }
1335         error = pc_lockfs(fsp, 0, 0);
1336         if (error)
1337                 return (error);
1338         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1339                 pc_unlockfs(fsp);
1340                 return (EIO);
1341         }
1342 
1343         bzero(ld, sizeof (*ld));
1344 
1345         if (eofp != NULL)
1346                 *eofp = 0;
1347         offset = uiop->uio_loffset;
1348 
1349         if (dvp->v_flag & VROOT) {
1350                 /*
1351                  * kludge up entries for "." and ".." in the root.
1352                  */
1353                 if (offset == 0) {
1354                         (void) strcpy(ld->d_name, ".");
1355                         ld->d_reclen = DIRENT64_RECLEN(1);
1356                         ld->d_off = (off64_t)sizeof (struct pcdir);
1357                         ld->d_ino = (ino64_t)UINT_MAX;
1358                         if (ld->d_reclen > uiop->uio_resid) {
1359                                 pc_unlockfs(fsp);
1360                                 return (ENOSPC);
1361                         }
1362                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1363                         uiop->uio_loffset = ld->d_off;
1364                         offset = uiop->uio_loffset;
1365                 }
1366                 if (offset == sizeof (struct pcdir)) {
1367                         (void) strcpy(ld->d_name, "..");
1368                         ld->d_reclen = DIRENT64_RECLEN(2);
1369                         if (ld->d_reclen > uiop->uio_resid) {
1370                                 pc_unlockfs(fsp);
1371                                 return (ENOSPC);
1372                         }
1373                         ld->d_off = (off64_t)(uiop->uio_loffset +
1374                             sizeof (struct pcdir));
1375                         ld->d_ino = (ino64_t)UINT_MAX;
1376                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1377                         uiop->uio_loffset = ld->d_off;
1378                         offset = uiop->uio_loffset;
1379                 }
1380                 offset -= 2 * sizeof (struct pcdir);
1381                 /* offset now has the real offset value into directory file */
1382         }
1383 
1384         for (;;) {
1385                 boff = pc_blkoff(fsp, offset);
1386                 if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1387                         if (bp != NULL) {
1388                                 brelse(bp);
1389                                 bp = NULL;
1390                         }
1391                         error = pc_blkatoff(pcp, offset, &bp, &ep);
1392                         if (error) {
1393                                 if (error == ENOENT) {
1394                                         error = 0;
1395                                         if (eofp)
1396                                                 *eofp = 1;
1397                                 }
1398                                 break;
1399                         }
1400                 }
1401                 if (ep->pcd_filename[0] == PCD_UNUSED) {
1402                         if (eofp)
1403                                 *eofp = 1;
1404                         break;
1405                 }
1406                 /*
1407                  * Don't display label because it may contain funny characters.
1408                  */
1409                 if (ep->pcd_filename[0] == PCD_ERASED) {
1410                         uiop->uio_loffset += sizeof (struct pcdir);
1411                         offset += sizeof (struct pcdir);
1412                         ep++;
1413                         continue;
1414                 }
1415                 if (PCDL_IS_LFN(ep)) {
1416                         if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1417                             0)
1418                                 break;
1419                         continue;
1420                 }
1421 
1422                 if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1423                         break;
1424         }
1425         if (bp)
1426                 brelse(bp);
1427         pc_unlockfs(fsp);
1428         return (error);
1429 }
1430 
1431 
1432 /*
1433  * Called from pvn_getpages to get a particular page.  When we are called
1434  * the pcfs is already locked.
1435  */
1436 /*ARGSUSED*/
1437 static int
1438 pcfs_getapage(
1439         struct vnode *vp,
1440         u_offset_t off,
1441         size_t len,
1442         uint_t *protp,
1443         page_t *pl[],           /* NULL if async IO is requested */
1444         size_t plsz,
1445         struct seg *seg,
1446         caddr_t addr,
1447         enum seg_rw rw,
1448         struct cred *cr)
1449 {
1450         struct pcnode *pcp;
1451         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1452         struct vnode *devvp;
1453         page_t *pp;
1454         page_t *pagefound;
1455         int err;
1456 
1457         /*
1458          * If the filesystem was umounted by force, return immediately.
1459          */
1460         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1461                 return (EIO);
1462 
1463         PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1464             (void *)vp, off, len);
1465 
1466         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1467                 return (EIO);
1468         devvp = fsp->pcfs_devvp;
1469 
1470         /* pcfs doesn't do readaheads */
1471         if (pl == NULL)
1472                 return (0);
1473 
1474         pl[0] = NULL;
1475         err = 0;
1476         /*
1477          * If the accessed time on the pcnode has not already been
1478          * set elsewhere (e.g. for read/setattr) we set the time now.
1479          * This gives us approximate modified times for mmap'ed files
1480          * which are accessed via loads in the user address space.
1481          */
1482         if ((pcp->pc_flags & PC_ACC) == 0 &&
1483             ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1484                 pc_mark_acc(fsp, pcp);
1485         }
1486 reread:
1487         if ((pagefound = page_exists(vp, off)) == NULL) {
1488                 /*
1489                  * Need to really do disk IO to get the page(s).
1490                  */
1491                 struct buf *bp;
1492                 daddr_t lbn, bn;
1493                 u_offset_t io_off;
1494                 size_t io_len;
1495                 u_offset_t lbnoff, xferoffset;
1496                 u_offset_t pgoff;
1497                 uint_t  xfersize;
1498                 int err1;
1499 
1500                 lbn = pc_lblkno(fsp, off);
1501                 lbnoff = off & ~(fsp->pcfs_clsize - 1);
1502                 xferoffset = off & ~(fsp->pcfs_secsize - 1);
1503 
1504                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1505                     off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1506                 if (pp == NULL)
1507                         /*
1508                          * XXX - If pcfs is made MT-hot, this should go
1509                          * back to reread.
1510                          */
1511                         panic("pcfs_getapage pvn_read_kluster");
1512 
1513                 for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1514                     pgoff += xfersize,
1515                     lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1516                     lbnoff += xfersize, xferoffset += xfersize) {
1517                         /*
1518                          * read as many contiguous blocks as possible to
1519                          * fill this page
1520                          */
1521                         xfersize = PAGESIZE - pgoff;
1522                         err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1523                         if (err1) {
1524                                 PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1525                                 err = err1;
1526                                 goto out;
1527                         }
1528                         bp = pageio_setup(pp, xfersize, devvp, B_READ);
1529                         bp->b_edev = devvp->v_rdev;
1530                         bp->b_dev = cmpdev(devvp->v_rdev);
1531                         bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1532                         bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1533                         bp->b_file = vp;
1534                         bp->b_offset = (offset_t)(off + pgoff);
1535 
1536                         (void) bdev_strategy(bp);
1537 
1538                         lwp_stat_update(LWP_STAT_INBLK, 1);
1539 
1540                         if (err == 0)
1541                                 err = biowait(bp);
1542                         else
1543                                 (void) biowait(bp);
1544                         pageio_done(bp);
1545                         if (err)
1546                                 goto out;
1547                 }
1548                 if (pgoff < PAGESIZE) {
1549                         pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1550                 }
1551                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1552         }
1553 out:
1554         if (err) {
1555                 if (pp != NULL)
1556                         pvn_read_done(pp, B_ERROR);
1557                 return (err);
1558         }
1559 
1560         if (pagefound) {
1561                 /*
1562                  * Page exists in the cache, acquire the "shared"
1563                  * lock.  If this fails, go back to reread.
1564                  */
1565                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1566                         goto reread;
1567                 }
1568                 pl[0] = pp;
1569                 pl[1] = NULL;
1570         }
1571         return (err);
1572 }
1573 
1574 /*
1575  * Return all the pages from [off..off+len] in given file
1576  */
1577 /* ARGSUSED */
1578 static int
1579 pcfs_getpage(
1580         struct vnode *vp,
1581         offset_t off,
1582         size_t len,
1583         uint_t *protp,
1584         page_t *pl[],
1585         size_t plsz,
1586         struct seg *seg,
1587         caddr_t addr,
1588         enum seg_rw rw,
1589         struct cred *cr,
1590         caller_context_t *ct)
1591 {
1592         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1593         int err;
1594 
1595         PC_DPRINTF0(6, "pcfs_getpage\n");
1596         if (err = pc_verify(fsp))
1597                 return (err);
1598         if (vp->v_flag & VNOMAP)
1599                 return (ENOSYS);
1600         ASSERT(off <= UINT32_MAX);
1601         err = pc_lockfs(fsp, 0, 0);
1602         if (err)
1603                 return (err);
1604         if (protp != NULL)
1605                 *protp = PROT_ALL;
1606 
1607         ASSERT((off & PAGEOFFSET) == 0);
1608         err = pvn_getpages(pcfs_getapage, vp, off, len, protp, pl, plsz,
1609             seg, addr, rw, cr);
1610 
1611         pc_unlockfs(fsp);
1612         return (err);
1613 }
1614 
1615 
1616 /*
1617  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1618  * If len == 0, do from off to EOF.
1619  *
1620  * The normal cases should be len == 0 & off == 0 (entire vp list),
1621  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1622  * (from pageout).
1623  *
1624  */
1625 /*ARGSUSED*/
1626 static int
1627 pcfs_putpage(
1628         struct vnode *vp,
1629         offset_t off,
1630         size_t len,
1631         int flags,
1632         struct cred *cr,
1633         caller_context_t *ct)
1634 {
1635         struct pcnode *pcp;
1636         page_t *pp;
1637         struct pcfs *fsp;
1638         u_offset_t io_off;
1639         size_t io_len;
1640         offset_t eoff;
1641         int err;
1642 
1643         /*
1644          * If the filesystem was umounted by force, return immediately.
1645          */
1646         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1647                 return (EIO);
1648 
1649         PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1650         if (vp->v_flag & VNOMAP)
1651                 return (ENOSYS);
1652 
1653         fsp = VFSTOPCFS(vp->v_vfsp);
1654 
1655         if (err = pc_verify(fsp))
1656                 return (err);
1657         if ((pcp = VTOPC(vp)) == NULL) {
1658                 PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1659                 return (EIO);
1660         }
1661         if (pcp->pc_flags & PC_INVAL)
1662                 return (EIO);
1663 
1664         if (curproc == proc_pageout) {
1665                 /*
1666                  * XXX - This is a quick hack to avoid blocking
1667                  * pageout. Also to avoid pcfs_getapage deadlocking
1668                  * with putpage when memory is running out,
1669                  * since we only have one global lock and we don't
1670                  * support async putpage.
1671                  * It should be fixed someday.
1672                  *
1673                  * Interestingly, this used to be a test of NOMEMWAIT().
1674                  * We only ever got here once pcfs started supporting
1675                  * NFS sharing, and then only because the NFS server
1676                  * threads seem to do writes in sched's process context.
1677                  * Since everyone else seems to just care about pageout,
1678                  * the test was changed to look for pageout directly.
1679                  */
1680                 return (ENOMEM);
1681         }
1682 
1683         ASSERT(off <= UINT32_MAX);
1684 
1685         flags &= ~B_ASYNC;  /* XXX should fix this later */
1686 
1687         err = pc_lockfs(fsp, 0, 0);
1688         if (err)
1689                 return (err);
1690         if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1691                 pc_unlockfs(fsp);
1692                 return (0);
1693         }
1694 
1695         if (len == 0) {
1696                 /*
1697                  * Search the entire vp list for pages >= off
1698                  */
1699                 err = pvn_vplist_dirty(vp, off,
1700                     pcfs_putapage, flags, cr);
1701         } else {
1702                 eoff = off + len;
1703 
1704                 for (io_off = off; io_off < eoff &&
1705                     io_off < pcp->pc_size; io_off += io_len) {
1706                         /*
1707                          * If we are not invalidating, synchronously
1708                          * freeing or writing pages use the routine
1709                          * page_lookup_nowait() to prevent reclaiming
1710                          * them from the free list.
1711                          */
1712                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1713                                 pp = page_lookup(vp, io_off,
1714                                     (flags & (B_INVAL | B_FREE)) ?
1715                                     SE_EXCL : SE_SHARED);
1716                         } else {
1717                                 pp = page_lookup_nowait(vp, io_off,
1718                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1719                         }
1720 
1721                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1722                                 io_len = PAGESIZE;
1723                         else {
1724                                 err = pcfs_putapage(vp, pp, &io_off, &io_len,
1725                                     flags, cr);
1726                                 if (err != 0)
1727                                         break;
1728                                 /*
1729                                  * "io_off" and "io_len" are returned as
1730                                  * the range of pages we actually wrote.
1731                                  * This allows us to skip ahead more quickly
1732                                  * since several pages may've been dealt
1733                                  * with by this iteration of the loop.
1734                                  */
1735                         }
1736                 }
1737         }
1738         if (err == 0 && (flags & B_INVAL) &&
1739             off == 0 && len == 0 && vn_has_cached_data(vp)) {
1740                 /*
1741                  * If doing "invalidation", make sure that
1742                  * all pages on the vnode list are actually
1743                  * gone.
1744                  */
1745                 cmn_err(CE_PANIC,
1746                     "pcfs_putpage: B_INVAL, pages not gone");
1747         } else if (err) {
1748                 PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1749         }
1750         pc_unlockfs(fsp);
1751         return (err);
1752 }
1753 
1754 /*
1755  * Write out a single page, possibly klustering adjacent dirty pages.
1756  */
1757 /*ARGSUSED*/
1758 int
1759 pcfs_putapage(
1760         struct vnode *vp,
1761         page_t *pp,
1762         u_offset_t *offp,
1763         size_t *lenp,
1764         int flags,
1765         struct cred *cr)
1766 {
1767         struct pcnode *pcp;
1768         struct pcfs *fsp;
1769         struct vnode *devvp;
1770         size_t io_len;
1771         daddr_t bn;
1772         u_offset_t lbn, lbnoff, xferoffset;
1773         uint_t pgoff, xfersize;
1774         int err = 0;
1775         u_offset_t io_off;
1776 
1777         pcp = VTOPC(vp);
1778         fsp = VFSTOPCFS(vp->v_vfsp);
1779         devvp = fsp->pcfs_devvp;
1780 
1781         /*
1782          * If the modified time on the inode has not already been
1783          * set elsewhere (e.g. for write/setattr) and this is not
1784          * a call from msync (B_FORCE) we set the time now.
1785          * This gives us approximate modified times for mmap'ed files
1786          * which are modified via stores in the user address space.
1787          */
1788         if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1789                 pcp->pc_flags |= PC_MOD;
1790                 pc_mark_mod(fsp, pcp);
1791         }
1792         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1793             PAGESIZE, flags);
1794 
1795         if (fsp->pcfs_flags & PCFS_IRRECOV) {
1796                 goto out;
1797         }
1798 
1799         PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1800 
1801         lbn = pc_lblkno(fsp, io_off);
1802         lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1803         xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1804 
1805         for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1806             pgoff += xfersize,
1807             lbn += howmany(xfersize, fsp->pcfs_clsize),
1808             lbnoff += xfersize, xferoffset += xfersize) {
1809 
1810                 struct buf *bp;
1811                 int err1;
1812 
1813                 /*
1814                  * write as many contiguous blocks as possible from this page
1815                  */
1816                 xfersize = io_len - pgoff;
1817                 err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1818                 if (err1) {
1819                         err = err1;
1820                         goto out;
1821                 }
1822                 bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1823                 bp->b_edev = devvp->v_rdev;
1824                 bp->b_dev = cmpdev(devvp->v_rdev);
1825                 bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1826                 bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1827                 bp->b_file = vp;
1828                 bp->b_offset = (offset_t)(io_off + pgoff);
1829 
1830                 (void) bdev_strategy(bp);
1831 
1832                 lwp_stat_update(LWP_STAT_OUBLK, 1);
1833 
1834                 if (err == 0)
1835                         err = biowait(bp);
1836                 else
1837                         (void) biowait(bp);
1838                 pageio_done(bp);
1839         }
1840         pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1841         pp = NULL;
1842 
1843 out:
1844         if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1845                 pvn_write_done(pp, B_WRITE | flags);
1846         } else if (err != 0 && pp != NULL) {
1847                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1848         }
1849 
1850         if (offp)
1851                 *offp = io_off;
1852         if (lenp)
1853                 *lenp = io_len;
1854                 PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1855                     (void *)vp, (void *)pp, io_off, io_len);
1856         if (err) {
1857                 PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1858         }
1859         return (err);
1860 }
1861 
1862 /*ARGSUSED*/
1863 static int
1864 pcfs_map(
1865         struct vnode *vp,
1866         offset_t off,
1867         struct as *as,
1868         caddr_t *addrp,
1869         size_t len,
1870         uchar_t prot,
1871         uchar_t maxprot,
1872         uint_t flags,
1873         struct cred *cr,
1874         caller_context_t *ct)
1875 {
1876         struct segvn_crargs vn_a;
1877         int error;
1878 
1879         PC_DPRINTF0(6, "pcfs_map\n");
1880         if (vp->v_flag & VNOMAP)
1881                 return (ENOSYS);
1882 
1883         if (off > UINT32_MAX || off + len > UINT32_MAX)
1884                 return (ENXIO);
1885 
1886         as_rangelock(as);
1887         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1888         if (error != 0) {
1889                 as_rangeunlock(as);
1890                 return (error);
1891         }
1892 
1893         vn_a.vp = vp;
1894         vn_a.offset = off;
1895         vn_a.type = flags & MAP_TYPE;
1896         vn_a.prot = prot;
1897         vn_a.maxprot = maxprot;
1898         vn_a.flags = flags & ~MAP_TYPE;
1899         vn_a.cred = cr;
1900         vn_a.amp = NULL;
1901         vn_a.szc = 0;
1902         vn_a.lgrp_mem_policy_flags = 0;
1903 
1904         error = as_map(as, *addrp, len, segvn_create, &vn_a);
1905         as_rangeunlock(as);
1906         return (error);
1907 }
1908 
1909 /* ARGSUSED */
1910 static int
1911 pcfs_seek(
1912         struct vnode *vp,
1913         offset_t ooff,
1914         offset_t *noffp,
1915         caller_context_t *ct)
1916 {
1917         if (*noffp < 0)
1918                 return (EINVAL);
1919         else if (*noffp > MAXOFFSET_T)
1920                 return (EINVAL);
1921         else
1922                 return (0);
1923 }
1924 
1925 /* ARGSUSED */
1926 static int
1927 pcfs_addmap(
1928         struct vnode *vp,
1929         offset_t off,
1930         struct as *as,
1931         caddr_t addr,
1932         size_t len,
1933         uchar_t prot,
1934         uchar_t maxprot,
1935         uint_t flags,
1936         struct cred *cr,
1937         caller_context_t *ct)
1938 {
1939         if (vp->v_flag & VNOMAP)
1940                 return (ENOSYS);
1941         return (0);
1942 }
1943 
1944 /*ARGSUSED*/
1945 static int
1946 pcfs_delmap(
1947         struct vnode *vp,
1948         offset_t off,
1949         struct as *as,
1950         caddr_t addr,
1951         size_t len,
1952         uint_t prot,
1953         uint_t maxprot,
1954         uint_t flags,
1955         struct cred *cr,
1956         caller_context_t *ct)
1957 {
1958         if (vp->v_flag & VNOMAP)
1959                 return (ENOSYS);
1960         return (0);
1961 }
1962 
1963 /*
1964  * POSIX pathconf() support.
1965  */
1966 /* ARGSUSED */
1967 static int
1968 pcfs_pathconf(
1969         struct vnode *vp,
1970         int cmd,
1971         ulong_t *valp,
1972         struct cred *cr,
1973         caller_context_t *ct)
1974 {
1975         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1976 
1977         switch (cmd) {
1978         case _PC_LINK_MAX:
1979                 *valp = 1;
1980                 return (0);
1981 
1982         case _PC_CASE_BEHAVIOR:
1983                 return (EINVAL);
1984 
1985         case _PC_FILESIZEBITS:
1986                 /*
1987                  * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1988                  * FAT12 can only go up to the maximum filesystem capacity
1989                  * which is ~509MB.
1990                  */
1991                 *valp = IS_FAT12(fsp) ? 30 : 33;
1992                 return (0);
1993 
1994         case _PC_TIMESTAMP_RESOLUTION:
1995                 /*
1996                  * PCFS keeps track of modification times, it its own
1997                  * internal format, to a resolution of 2 seconds.
1998                  * Since 2000 million is representable in an int32_t
1999                  * without overflow (or becoming negative), we allow
2000                  * this value to be returned.
2001                  */
2002                 *valp = 2000000000L;
2003                 return (0);
2004 
2005         default:
2006                 return (fs_pathconf(vp, cmd, valp, cr, ct));
2007         }
2008 
2009 }
2010 
2011 /* ARGSUSED */
2012 static int
2013 pcfs_space(
2014         struct vnode *vp,
2015         int cmd,
2016         struct flock64 *bfp,
2017         int flag,
2018         offset_t offset,
2019         cred_t *cr,
2020         caller_context_t *ct)
2021 {
2022         struct vattr vattr;
2023         int error;
2024 
2025         if (cmd != F_FREESP)
2026                 return (EINVAL);
2027 
2028         if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2029                 if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2030                         return (EFBIG);
2031                 /*
2032                  * we only support the special case of l_len == 0,
2033                  * meaning free to end of file at this moment.
2034                  */
2035                 if (bfp->l_len != 0)
2036                         return (EINVAL);
2037                 vattr.va_mask = AT_SIZE;
2038                 vattr.va_size = bfp->l_start;
2039                 error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2040         }
2041         return (error);
2042 }
2043 
2044 /*
2045  * Break up 'len' chars from 'buf' into a long file name chunk.
2046  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2047  */
2048 void
2049 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2050 {
2051         int     i;
2052 
2053         ASSERT(buf != NULL);
2054 
2055         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2056                 if (len > 0) {
2057                         ep->pcdl_firstfilename[i] = *buf++;
2058                         ep->pcdl_firstfilename[i + 1] = *buf++;
2059                         len -= 2;
2060                 } else {
2061                         ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2062                         ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2063                 }
2064         }
2065 
2066         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2067                 if (len > 0) {
2068                         ep->pcdl_secondfilename[i] = *buf++;
2069                         ep->pcdl_secondfilename[i + 1] = *buf++;
2070                         len -= 2;
2071                 } else {
2072                         ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2073                         ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2074                 }
2075         }
2076         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2077                 if (len > 0) {
2078                         ep->pcdl_thirdfilename[i] = *buf++;
2079                         ep->pcdl_thirdfilename[i + 1] = *buf++;
2080                         len -= 2;
2081                 } else {
2082                         ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2083                         ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2084                 }
2085         }
2086 }
2087 
2088 /*
2089  * Extract the characters from the long filename chunk into 'buf'.
2090  * Return the number of characters extracted.
2091  */
2092 static int
2093 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2094 {
2095         char    *tmp = buf;
2096         int     i;
2097 
2098         /* Copy all the names, no filtering now */
2099 
2100         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2101                 *tmp = ep->pcdl_firstfilename[i];
2102                 *(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2103 
2104                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2105                         return (tmp - buf);
2106         }
2107         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2108                 *tmp = ep->pcdl_secondfilename[i];
2109                 *(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2110 
2111                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2112                         return (tmp - buf);
2113         }
2114         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2115                 *tmp = ep->pcdl_thirdfilename[i];
2116                 *(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2117 
2118                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2119                         return (tmp - buf);
2120         }
2121         return (tmp - buf);
2122 }
2123 
2124 
2125 /*
2126  * Checksum the passed in short filename.
2127  * This is used to validate each component of the long name to make
2128  * sure the long name is valid (it hasn't been "detached" from the
2129  * short filename). This algorithm was found in FreeBSD.
2130  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2131  */
2132 
2133 uchar_t
2134 pc_checksum_long_fn(char *name, char *ext)
2135 {
2136         uchar_t c;
2137         char    b[11];
2138 
2139         bcopy(name, b, 8);
2140         bcopy(ext, b+8, 3);
2141 
2142         c = b[0];
2143         c = ((c << 7) | (c >> 1)) + b[1];
2144         c = ((c << 7) | (c >> 1)) + b[2];
2145         c = ((c << 7) | (c >> 1)) + b[3];
2146         c = ((c << 7) | (c >> 1)) + b[4];
2147         c = ((c << 7) | (c >> 1)) + b[5];
2148         c = ((c << 7) | (c >> 1)) + b[6];
2149         c = ((c << 7) | (c >> 1)) + b[7];
2150         c = ((c << 7) | (c >> 1)) + b[8];
2151         c = ((c << 7) | (c >> 1)) + b[9];
2152         c = ((c << 7) | (c >> 1)) + b[10];
2153 
2154         return (c);
2155 }
2156 
2157 /*
2158  * Read a chunk of long filename entries into 'namep'.
2159  * Return with offset pointing to short entry (on success), or next
2160  * entry to read (if this wasn't a valid lfn really).
2161  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2162  * a long filename.
2163  *
2164  * Can also be called with a NULL namep, in which case it just returns
2165  * whether this was really a valid long filename and consumes it
2166  * (used by pc_dirempty()).
2167  */
2168 int
2169 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2170     struct pcdir **epp, offset_t *offset, struct buf **bp)
2171 {
2172         struct pcdir *ep = *epp;
2173         struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2174         struct vnode *dvp = PCTOV(pcp);
2175         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2176         char    *lfn;
2177         char    *lfn_base;
2178         int     boff;
2179         int     i, cs;
2180         char    *buf;
2181         uchar_t cksum;
2182         int     detached = 0;
2183         int     error = 0;
2184         int     foldcase;
2185         int     count = 0;
2186         size_t  u16l = 0, u8l = 0;
2187         char    *outbuf;
2188         size_t  ret, inlen, outlen;
2189 
2190         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2191         lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2192         lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2193         *lfn = '\0';
2194         *(lfn + 1) = '\0';
2195         cksum = lep->pcdl_checksum;
2196 
2197         buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2198         for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2199                 /* read next block if necessary */
2200                 boff = pc_blkoff(fsp, *offset);
2201                 if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2202                         if (*bp != NULL) {
2203                                 brelse(*bp);
2204                                 *bp = NULL;
2205                         }
2206                         error = pc_blkatoff(pcp, *offset, bp, &ep);
2207                         if (error) {
2208                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2209                                 kmem_free(buf, PCMAXNAM_UTF16);
2210                                 return (error);
2211                         }
2212                         lep = (struct pcdir_lfn *)ep;
2213                 }
2214                 /* can this happen? Bad fs? */
2215                 if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2216                         detached = 1;
2217                         break;
2218                 }
2219                 if (cksum != lep->pcdl_checksum)
2220                         detached = 1;
2221                 /* process current entry */
2222                 cs = get_long_fn_chunk(lep, buf);
2223                 count += cs;
2224                 for (; cs > 0; cs--) {
2225                         /* see if we underflow */
2226                         if (lfn >= lfn_base)
2227                                 *--lfn = buf[cs - 1];
2228                         else
2229                                 detached = 1;
2230                 }
2231                 lep++;
2232                 *offset += sizeof (struct pcdir);
2233         }
2234         kmem_free(buf, PCMAXNAM_UTF16);
2235         /* read next block if necessary */
2236         boff = pc_blkoff(fsp, *offset);
2237         ep = (struct pcdir *)lep;
2238         if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2239                 if (*bp != NULL) {
2240                         brelse(*bp);
2241                         *bp = NULL;
2242                 }
2243                 error = pc_blkatoff(pcp, *offset, bp, &ep);
2244                 if (error) {
2245                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2246                         return (error);
2247                 }
2248         }
2249         /* should be on the short one */
2250         if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2251             (ep->pcd_filename[0] == PCD_ERASED))) {
2252                 detached = 1;
2253         }
2254         if (detached ||
2255             (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2256             !pc_valid_long_fn(lfn, 0)) {
2257                 /*
2258                  * process current entry again. This may end up another lfn
2259                  * or a short name.
2260                  */
2261                 *epp = ep;
2262                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2263                 return (EINVAL);
2264         }
2265         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2266                 /*
2267                  * Don't display label because it may contain
2268                  * funny characters.
2269                  */
2270                 *offset += sizeof (struct pcdir);
2271                 ep++;
2272                 *epp = ep;
2273                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2274                 return (EINVAL);
2275         }
2276         if (namep) {
2277                 u16l = count / 2;
2278                 u8l = PCMAXNAMLEN;
2279                 error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2280                     (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2281                 /*
2282                  * uconv_u16tou8() will catch conversion errors including
2283                  * the case where there is not enough room to write the
2284                  * converted result and the u8l will never go over the given
2285                  * PCMAXNAMLEN.
2286                  */
2287                 if (error != 0) {
2288                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2289                         return (EINVAL);
2290                 }
2291                 namep[u8l] = '\0';
2292                 if (foldcase) {
2293                         inlen = strlen(namep);
2294                         outlen = PCMAXNAMLEN;
2295                         outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2296                         ret = u8_textprep_str(namep, &inlen, outbuf,
2297                             &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2298                             &error);
2299                         if (ret == -1) {
2300                                 kmem_free(outbuf, PCMAXNAMLEN + 1);
2301                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2302                                 return (EINVAL);
2303                         }
2304                         outbuf[PCMAXNAMLEN - outlen] = '\0';
2305                         (void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2306                         kmem_free(outbuf, PCMAXNAMLEN + 1);
2307                 }
2308         }
2309         kmem_free(lfn_base, PCMAXNAM_UTF16);
2310         *epp = ep;
2311         return (0);
2312 }
2313 /*
2314  * Read a long filename into the pc_dirent structure and copy it out.
2315  */
2316 int
2317 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2318     struct pcdir **epp, offset_t *offset, struct buf **bp)
2319 {
2320         struct pcdir *ep;
2321         struct pcnode *pcp = VTOPC(dvp);
2322         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2323         offset_t uiooffset = uiop->uio_loffset;
2324         int     error = 0;
2325         offset_t oldoffset;
2326 
2327         oldoffset = *offset;
2328         error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2329         if (error) {
2330                 if (error == EINVAL) {
2331                         uiop->uio_loffset += *offset - oldoffset;
2332                         return (0);
2333                 } else
2334                         return (error);
2335         }
2336 
2337         ep = *epp;
2338         uiop->uio_loffset += *offset - oldoffset;
2339         ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2340         if (ld->d_reclen > uiop->uio_resid) {
2341                 uiop->uio_loffset = uiooffset;
2342                 return (ENOSPC);
2343         }
2344         ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2345         ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2346             pc_blkoff(fsp, *offset), ep->pcd_attr,
2347             pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2348         (void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2349         uiop->uio_loffset = ld->d_off;
2350         *offset += sizeof (struct pcdir);
2351         ep++;
2352         *epp = ep;
2353         return (0);
2354 }
2355 
2356 /*
2357  * Read a short filename into the pc_dirent structure and copy it out.
2358  */
2359 int
2360 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2361     struct pcdir **epp, offset_t *offset, struct buf **bp)
2362 {
2363         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2364         int     boff = pc_blkoff(fsp, *offset);
2365         struct pcdir *ep = *epp;
2366         offset_t        oldoffset = uiop->uio_loffset;
2367         int     error;
2368         int     foldcase;
2369 
2370         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2371                 uiop->uio_loffset += sizeof (struct pcdir);
2372                 *offset += sizeof (struct pcdir);
2373                 ep++;
2374                 *epp = ep;
2375                 return (0);
2376         }
2377         ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2378             boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2379             pc_direntpersec(fsp));
2380         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2381         error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2382             &ep->pcd_ext[0], foldcase);
2383         if (error == 0) {
2384                 ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2385                 if (ld->d_reclen > uiop->uio_resid) {
2386                         uiop->uio_loffset = oldoffset;
2387                         return (ENOSPC);
2388                 }
2389                 ld->d_off = (off64_t)(uiop->uio_loffset +
2390                     sizeof (struct pcdir));
2391                 (void) uiomove((caddr_t)ld,
2392                     ld->d_reclen, UIO_READ, uiop);
2393                 uiop->uio_loffset = ld->d_off;
2394         } else {
2395                 uiop->uio_loffset += sizeof (struct pcdir);
2396         }
2397         *offset += sizeof (struct pcdir);
2398         ep++;
2399         *epp = ep;
2400         return (0);
2401 }
2402 
2403 /* ARGSUSED */
2404 static int
2405 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2406 {
2407         struct pc_fid *pcfid;
2408         struct pcnode *pcp;
2409         struct pcfs     *fsp;
2410         int     error;
2411 
2412         fsp = VFSTOPCFS(vp->v_vfsp);
2413         if (fsp == NULL)
2414                 return (EIO);
2415         error = pc_lockfs(fsp, 0, 0);
2416         if (error)
2417                 return (error);
2418         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2419                 pc_unlockfs(fsp);
2420                 return (EIO);
2421         }
2422         if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2423                 fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2424                 pc_unlockfs(fsp);
2425                 return (ENOSPC);
2426         }
2427 
2428         pcfid = (struct pc_fid *)fidp;
2429         bzero(pcfid, sizeof (struct pc_fid));
2430         pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2431         if (vp->v_flag & VROOT) {
2432                 pcfid->pcfid_block = 0;
2433                 pcfid->pcfid_offset = 0;
2434                 pcfid->pcfid_ctime = 0;
2435         } else {
2436                 pcfid->pcfid_block = pcp->pc_eblkno;
2437                 pcfid->pcfid_offset = pcp->pc_eoffset;
2438                 pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2439         }
2440         pc_unlockfs(fsp);
2441         return (0);
2442 }