1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  26  *      All rights reserved.
  27  */
  28 /*
  29  * Copyright (c) 2017 by Delphix. All rights reserved.
  30  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  31  */
  32 
  33 /*
  34  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
  35  * but then heavily modified. It's no longer an array of hash lists,
  36  * but an AVL tree per mount point.  More on this below.
  37  */
  38 
  39 #include <sys/param.h>
  40 #include <sys/systm.h>
  41 #include <sys/time.h>
  42 #include <sys/vnode.h>
  43 #include <sys/atomic.h>
  44 #include <sys/bitmap.h>
  45 #include <sys/buf.h>
  46 #include <sys/dnlc.h>
  47 #include <sys/kmem.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/sysmacros.h>
  50 #include <sys/fcntl.h>
  51 
  52 #include <netsmb/smb_osdep.h>
  53 
  54 #include <netsmb/smb.h>
  55 #include <netsmb/smb_conn.h>
  56 #include <netsmb/smb_subr.h>
  57 #include <netsmb/smb_rq.h>
  58 
  59 #include <smbfs/smbfs.h>
  60 #include <smbfs/smbfs_node.h>
  61 #include <smbfs/smbfs_subr.h>
  62 
  63 /*
  64  * The AVL trees (now per-mount) allow finding an smbfs node by its
  65  * full remote path name.  It also allows easy traversal of all nodes
  66  * below (path wise) any given node.  A reader/writer lock for each
  67  * (per mount) AVL tree is used to control access and to synchronize
  68  * lookups, additions, and deletions from that AVL tree.
  69  *
  70  * Previously, this code use a global array of hash chains, each with
  71  * its own rwlock.  A few struct members, functions, and comments may
  72  * still refer to a "hash", and those should all now be considered to
  73  * refer to the per-mount AVL tree that replaced the old hash chains.
  74  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
  75  *
  76  * The smbnode freelist is organized as a doubly linked list with
  77  * a head pointer.  Additions and deletions are synchronized via
  78  * a single mutex.
  79  *
  80  * In order to add an smbnode to the free list, it must be linked into
  81  * the mount's AVL tree and the exclusive lock for the AVL must be held.
  82  * If an smbnode is not linked into the AVL tree, then it is destroyed
  83  * because it represents no valuable information that can be reused
  84  * about the file.  The exclusive lock for the AVL tree must be held
  85  * in order to prevent a lookup in the AVL tree from finding the
  86  * smbnode and using it and assuming that the smbnode is not on the
  87  * freelist.  The lookup in the AVL tree will have the AVL tree lock
  88  * held, either exclusive or shared.
  89  *
  90  * The vnode reference count for each smbnode is not allowed to drop
  91  * below 1.  This prevents external entities, such as the VM
  92  * subsystem, from acquiring references to vnodes already on the
  93  * freelist and then trying to place them back on the freelist
  94  * when their reference is released.  This means that the when an
  95  * smbnode is looked up in the AVL tree, then either the smbnode
  96  * is removed from the freelist and that reference is tranfered to
  97  * the new reference or the vnode reference count must be incremented
  98  * accordingly.  The mutex for the freelist must be held in order to
  99  * accurately test to see if the smbnode is on the freelist or not.
 100  * The AVL tree lock might be held shared and it is possible that
 101  * two different threads may race to remove the smbnode from the
 102  * freelist.  This race can be resolved by holding the mutex for the
 103  * freelist.  Please note that the mutex for the freelist does not
 104  * need to held if the smbnode is not on the freelist.  It can not be
 105  * placed on the freelist due to the requirement that the thread
 106  * putting the smbnode on the freelist must hold the exclusive lock
 107  * for the AVL tree and the thread doing the lookup in the AVL tree
 108  * is holding either a shared or exclusive lock for the AVL tree.
 109  *
 110  * The lock ordering is:
 111  *
 112  *      AVL tree lock -> vnode lock
 113  *      AVL tree lock -> freelist lock
 114  */
 115 
 116 static kmutex_t smbfreelist_lock;
 117 static smbnode_t *smbfreelist = NULL;
 118 static ulong_t  smbnodenew = 0;
 119 long    nsmbnode = 0;
 120 
 121 static struct kmem_cache *smbnode_cache;
 122 
 123 static const vsecattr_t smbfs_vsa0 = { 0 };
 124 
 125 /*
 126  * Mutex to protect the following variables:
 127  *      smbfs_major
 128  *      smbfs_minor
 129  */
 130 kmutex_t smbfs_minor_lock;
 131 int smbfs_major;
 132 int smbfs_minor;
 133 
 134 /* See smbfs_node_findcreate() */
 135 struct smbfattr smbfs_fattr0;
 136 
 137 /*
 138  * Local functions.
 139  * SN for Smb Node
 140  */
 141 static void sn_rmfree(smbnode_t *);
 142 static void sn_inactive(smbnode_t *);
 143 static void sn_addhash_locked(smbnode_t *, avl_index_t);
 144 static void sn_rmhash_locked(smbnode_t *);
 145 static void sn_destroy_node(smbnode_t *);
 146 void smbfs_kmem_reclaim(void *cdrarg);
 147 
 148 static smbnode_t *
 149 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
 150 
 151 static smbnode_t *
 152 make_smbnode(smbmntinfo_t *, const char *, int, int *);
 153 
 154 /*
 155  * Free the resources associated with an smbnode.
 156  * Note: This is different from smbfs_inactive
 157  *
 158  * From NFS: nfs_subr.c:rinactive
 159  */
 160 static void
 161 sn_inactive(smbnode_t *np)
 162 {
 163         vsecattr_t      ovsa;
 164         cred_t          *oldcr;
 165         char            *orpath;
 166         int             orplen;
 167         vnode_t         *vp;
 168 
 169         /*
 170          * Here NFS has:
 171          * Flush and invalidate all pages (done by caller)
 172          * Free any held credentials and caches...
 173          * etc.  (See NFS code)
 174          */
 175         mutex_enter(&np->r_statelock);
 176 
 177         ovsa = np->r_secattr;
 178         np->r_secattr = smbfs_vsa0;
 179         np->r_sectime = 0;
 180 
 181         oldcr = np->r_cred;
 182         np->r_cred = NULL;
 183 
 184         orpath = np->n_rpath;
 185         orplen = np->n_rplen;
 186         np->n_rpath = NULL;
 187         np->n_rplen = 0;
 188 
 189         mutex_exit(&np->r_statelock);
 190 
 191         vp = SMBTOV(np);
 192         if (vn_has_cached_data(vp)) {
 193                 ASSERT3P(vp,==,NULL);
 194         }
 195 
 196         if (ovsa.vsa_aclentp != NULL)
 197                 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
 198 
 199         if (oldcr != NULL)
 200                 crfree(oldcr);
 201 
 202         if (orpath != NULL)
 203                 kmem_free(orpath, orplen + 1);
 204 }
 205 
 206 /*
 207  * Find and optionally create an smbnode for the passed
 208  * mountinfo, directory, separator, and name.  If the
 209  * desired smbnode already exists, return a reference.
 210  * If the file attributes pointer is non-null, the node
 211  * is created if necessary and linked into the AVL tree.
 212  *
 213  * Callers that need a node created but don't have the
 214  * real attributes pass smbfs_fattr0 to force creation.
 215  *
 216  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
 217  *
 218  * Based on NFS: nfs_subr.c:makenfsnode
 219  */
 220 smbnode_t *
 221 smbfs_node_findcreate(
 222         smbmntinfo_t *mi,
 223         const char *dirnm,
 224         int dirlen,
 225         const char *name,
 226         int nmlen,
 227         char sep,
 228         struct smbfattr *fap)
 229 {
 230         char tmpbuf[256];
 231         size_t rpalloc;
 232         char *p, *rpath;
 233         int rplen;
 234         smbnode_t *np;
 235         vnode_t *vp;
 236         int newnode;
 237 
 238         /*
 239          * Build the search string, either in tmpbuf or
 240          * in allocated memory if larger than tmpbuf.
 241          */
 242         rplen = dirlen;
 243         if (sep != '\0')
 244                 rplen++;
 245         rplen += nmlen;
 246         if (rplen < sizeof (tmpbuf)) {
 247                 /* use tmpbuf */
 248                 rpalloc = 0;
 249                 rpath = tmpbuf;
 250         } else {
 251                 rpalloc = rplen + 1;
 252                 rpath = kmem_alloc(rpalloc, KM_SLEEP);
 253         }
 254         p = rpath;
 255         bcopy(dirnm, p, dirlen);
 256         p += dirlen;
 257         if (sep != '\0')
 258                 *p++ = sep;
 259         if (name != NULL) {
 260                 bcopy(name, p, nmlen);
 261                 p += nmlen;
 262         }
 263         ASSERT(p == rpath + rplen);
 264 
 265         /*
 266          * Find or create a node with this path.
 267          */
 268         rw_enter(&mi->smi_hash_lk, RW_READER);
 269         if (fap == NULL)
 270                 np = sn_hashfind(mi, rpath, rplen, NULL);
 271         else
 272                 np = make_smbnode(mi, rpath, rplen, &newnode);
 273         rw_exit(&mi->smi_hash_lk);
 274 
 275         if (rpalloc)
 276                 kmem_free(rpath, rpalloc);
 277 
 278         if (fap == NULL) {
 279                 /*
 280                  * Caller is "just looking" (no create)
 281                  * so np may or may not be NULL here.
 282                  * Either way, we're done.
 283                  */
 284                 return (np);
 285         }
 286 
 287         /*
 288          * We should have a node, possibly created.
 289          * Do we have (real) attributes to apply?
 290          */
 291         ASSERT(np != NULL);
 292         if (fap == &smbfs_fattr0)
 293                 return (np);
 294 
 295         /*
 296          * Apply the given attributes to this node,
 297          * dealing with any cache impact, etc.
 298          */
 299         vp = SMBTOV(np);
 300         smbfs_attrcache_fa(vp, fap);
 301 
 302         /*
 303          * Note NFS sets vp->v_type here, assuming it
 304          * can never change for the life of a node.
 305          * We allow v_type to change, and set it in
 306          * smbfs_attrcache().  Also: mode, uid, gid
 307          */
 308         return (np);
 309 }
 310 
 311 /*
 312  * Here NFS has: nfs_subr.c:rtablehash
 313  * We use smbfs_hash().
 314  */
 315 
 316 /*
 317  * Find or create an smbnode.
 318  * From NFS: nfs_subr.c:make_rnode
 319  */
 320 static smbnode_t *
 321 make_smbnode(
 322         smbmntinfo_t *mi,
 323         const char *rpath,
 324         int rplen,
 325         int *newnode)
 326 {
 327         smbnode_t *np;
 328         smbnode_t *tnp;
 329         vnode_t *vp;
 330         vfs_t *vfsp;
 331         avl_index_t where;
 332         char *new_rpath = NULL;
 333 
 334         ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
 335         vfsp = mi->smi_vfsp;
 336 
 337 start:
 338         np = sn_hashfind(mi, rpath, rplen, NULL);
 339         if (np != NULL) {
 340                 *newnode = 0;
 341                 return (np);
 342         }
 343 
 344         /* Note: will retake this lock below. */
 345         rw_exit(&mi->smi_hash_lk);
 346 
 347         /*
 348          * see if we can find something on the freelist
 349          */
 350         mutex_enter(&smbfreelist_lock);
 351         if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
 352                 np = smbfreelist;
 353                 sn_rmfree(np);
 354                 mutex_exit(&smbfreelist_lock);
 355 
 356                 vp = SMBTOV(np);
 357 
 358                 if (np->r_flags & RHASHED) {
 359                         smbmntinfo_t *tmp_mi = np->n_mount;
 360                         ASSERT(tmp_mi != NULL);
 361                         rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
 362                         mutex_enter(&vp->v_lock);
 363                         if (vp->v_count > 1) {
 364                                 VN_RELE_LOCKED(vp);
 365                                 mutex_exit(&vp->v_lock);
 366                                 rw_exit(&tmp_mi->smi_hash_lk);
 367                                 /* start over */
 368                                 rw_enter(&mi->smi_hash_lk, RW_READER);
 369                                 goto start;
 370                         }
 371                         mutex_exit(&vp->v_lock);
 372                         sn_rmhash_locked(np);
 373                         rw_exit(&tmp_mi->smi_hash_lk);
 374                 }
 375 
 376                 sn_inactive(np);
 377 
 378                 mutex_enter(&vp->v_lock);
 379                 if (vp->v_count > 1) {
 380                         VN_RELE_LOCKED(vp);
 381                         mutex_exit(&vp->v_lock);
 382                         rw_enter(&mi->smi_hash_lk, RW_READER);
 383                         goto start;
 384                 }
 385                 mutex_exit(&vp->v_lock);
 386                 vn_invalid(vp);
 387                 /*
 388                  * destroy old locks before bzero'ing and
 389                  * recreating the locks below.
 390                  */
 391                 smbfs_rw_destroy(&np->r_rwlock);
 392                 smbfs_rw_destroy(&np->r_lkserlock);
 393                 mutex_destroy(&np->r_statelock);
 394                 cv_destroy(&np->r_cv);
 395                 /*
 396                  * Make sure that if smbnode is recycled then
 397                  * VFS count is decremented properly before
 398                  * reuse.
 399                  */
 400                 VFS_RELE(vp->v_vfsp);
 401                 vn_reinit(vp);
 402         } else {
 403                 /*
 404                  * allocate and initialize a new smbnode
 405                  */
 406                 vnode_t *new_vp;
 407 
 408                 mutex_exit(&smbfreelist_lock);
 409 
 410                 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
 411                 new_vp = vn_alloc(KM_SLEEP);
 412 
 413                 atomic_inc_ulong((ulong_t *)&smbnodenew);
 414                 vp = new_vp;
 415         }
 416 
 417         /*
 418          * Allocate and copy the rpath we'll need below.
 419          */
 420         new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
 421         bcopy(rpath, new_rpath, rplen);
 422         new_rpath[rplen] = '\0';
 423 
 424         /* Initialize smbnode_t */
 425         bzero(np, sizeof (*np));
 426 
 427         smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
 428         smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
 429         mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
 430         cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
 431         /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
 432 
 433         np->r_vnode = vp;
 434         np->n_mount = mi;
 435 
 436         np->n_fid = NULL;
 437         np->n_uid = mi->smi_uid;
 438         np->n_gid = mi->smi_gid;
 439         /* Leave attributes "stale." */
 440 
 441         /*
 442          * Here NFS has avl_create(&np->r_dir, ...)
 443          * for the readdir cache (not used here).
 444          */
 445 
 446         /* Now fill in the vnode. */
 447         vn_setops(vp, smbfs_vnodeops);
 448         vp->v_data = (caddr_t)np;
 449         VFS_HOLD(vfsp);
 450         vp->v_vfsp = vfsp;
 451         vp->v_type = VNON;
 452 
 453         /*
 454          * We entered with mi->smi_hash_lk held (reader).
 455          * Retake it now, (as the writer).
 456          * Will return with it held.
 457          */
 458         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 459 
 460         /*
 461          * There is a race condition where someone else
 462          * may alloc the smbnode while no locks are held,
 463          * so check again and recover if found.
 464          */
 465         tnp = sn_hashfind(mi, rpath, rplen, &where);
 466         if (tnp != NULL) {
 467                 /*
 468                  * Lost the race.  Put the node we were building
 469                  * on the free list and return the one we found.
 470                  */
 471                 rw_exit(&mi->smi_hash_lk);
 472                 kmem_free(new_rpath, rplen + 1);
 473                 smbfs_addfree(np);
 474                 rw_enter(&mi->smi_hash_lk, RW_READER);
 475                 *newnode = 0;
 476                 return (tnp);
 477         }
 478 
 479         /*
 480          * Hash search identifies nodes by the remote path
 481          * (n_rpath) so fill that in now, before linking
 482          * this node into the node cache (AVL tree).
 483          */
 484         np->n_rpath = new_rpath;
 485         np->n_rplen = rplen;
 486         np->n_ino = smbfs_gethash(new_rpath, rplen);
 487 
 488         sn_addhash_locked(np, where);
 489         *newnode = 1;
 490         return (np);
 491 }
 492 
 493 /*
 494  * smbfs_addfree
 495  * Put an smbnode on the free list, or destroy it immediately
 496  * if it offers no value were it to be reclaimed later.  Also
 497  * destroy immediately when we have too many smbnodes, etc.
 498  *
 499  * Normally called by smbfs_inactive, but also
 500  * called in here during cleanup operations.
 501  *
 502  * From NFS: nfs_subr.c:rp_addfree
 503  */
 504 void
 505 smbfs_addfree(smbnode_t *np)
 506 {
 507         vnode_t *vp;
 508         struct vfs *vfsp;
 509         smbmntinfo_t *mi;
 510 
 511         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
 512 
 513         vp = SMBTOV(np);
 514         ASSERT(vp->v_count >= 1);
 515 
 516         vfsp = vp->v_vfsp;
 517         mi = VFTOSMI(vfsp);
 518 
 519         /*
 520          * If there are no more references to this smbnode and:
 521          * we have too many smbnodes allocated, or if the node
 522          * is no longer accessible via the AVL tree (!RHASHED),
 523          * or an i/o error occurred while writing to the file,
 524          * or it's part of an unmounted FS, then try to destroy
 525          * it instead of putting it on the smbnode freelist.
 526          */
 527         if (np->r_count == 0 && (
 528             (np->r_flags & RHASHED) == 0 ||
 529             (np->r_error != 0) ||
 530             (vfsp->vfs_flag & VFS_UNMOUNTED) ||
 531             (smbnodenew > nsmbnode))) {
 532 
 533                 /* Try to destroy this node. */
 534 
 535                 if (np->r_flags & RHASHED) {
 536                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 537                         mutex_enter(&vp->v_lock);
 538                         if (vp->v_count > 1) {
 539                                 VN_RELE_LOCKED(vp);
 540                                 mutex_exit(&vp->v_lock);
 541                                 rw_exit(&mi->smi_hash_lk);
 542                                 return;
 543                                 /*
 544                                  * Will get another call later,
 545                                  * via smbfs_inactive.
 546                                  */
 547                         }
 548                         mutex_exit(&vp->v_lock);
 549                         sn_rmhash_locked(np);
 550                         rw_exit(&mi->smi_hash_lk);
 551                 }
 552 
 553                 sn_inactive(np);
 554 
 555                 /*
 556                  * Recheck the vnode reference count.  We need to
 557                  * make sure that another reference has not been
 558                  * acquired while we were not holding v_lock.  The
 559                  * smbnode is not in the smbnode "hash" AVL tree, so
 560                  * the only way for a reference to have been acquired
 561                  * is for a VOP_PUTPAGE because the smbnode was marked
 562                  * with RDIRTY or for a modified page.  This vnode
 563                  * reference may have been acquired before our call
 564                  * to sn_inactive.  The i/o may have been completed,
 565                  * thus allowing sn_inactive to complete, but the
 566                  * reference to the vnode may not have been released
 567                  * yet.  In any case, the smbnode can not be destroyed
 568                  * until the other references to this vnode have been
 569                  * released.  The other references will take care of
 570                  * either destroying the smbnode or placing it on the
 571                  * smbnode freelist.  If there are no other references,
 572                  * then the smbnode may be safely destroyed.
 573                  */
 574                 mutex_enter(&vp->v_lock);
 575                 if (vp->v_count > 1) {
 576                         VN_RELE_LOCKED(vp);
 577                         mutex_exit(&vp->v_lock);
 578                         return;
 579                 }
 580                 mutex_exit(&vp->v_lock);
 581 
 582                 sn_destroy_node(np);
 583                 return;
 584         }
 585 
 586         /*
 587          * Lock the AVL tree and then recheck the reference count
 588          * to ensure that no other threads have acquired a reference
 589          * to indicate that the smbnode should not be placed on the
 590          * freelist.  If another reference has been acquired, then
 591          * just release this one and let the other thread complete
 592          * the processing of adding this smbnode to the freelist.
 593          */
 594         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 595 
 596         mutex_enter(&vp->v_lock);
 597         if (vp->v_count > 1) {
 598                 VN_RELE_LOCKED(vp);
 599                 mutex_exit(&vp->v_lock);
 600                 rw_exit(&mi->smi_hash_lk);
 601                 return;
 602         }
 603         mutex_exit(&vp->v_lock);
 604 
 605         /*
 606          * Put this node on the free list.
 607          */
 608         mutex_enter(&smbfreelist_lock);
 609         if (smbfreelist == NULL) {
 610                 np->r_freef = np;
 611                 np->r_freeb = np;
 612                 smbfreelist = np;
 613         } else {
 614                 np->r_freef = smbfreelist;
 615                 np->r_freeb = smbfreelist->r_freeb;
 616                 smbfreelist->r_freeb->r_freef = np;
 617                 smbfreelist->r_freeb = np;
 618         }
 619         mutex_exit(&smbfreelist_lock);
 620 
 621         rw_exit(&mi->smi_hash_lk);
 622 }
 623 
 624 /*
 625  * Remove an smbnode from the free list.
 626  *
 627  * The caller must be holding smbfreelist_lock and the smbnode
 628  * must be on the freelist.
 629  *
 630  * From NFS: nfs_subr.c:rp_rmfree
 631  */
 632 static void
 633 sn_rmfree(smbnode_t *np)
 634 {
 635 
 636         ASSERT(MUTEX_HELD(&smbfreelist_lock));
 637         ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
 638 
 639         if (np == smbfreelist) {
 640                 smbfreelist = np->r_freef;
 641                 if (np == smbfreelist)
 642                         smbfreelist = NULL;
 643         }
 644 
 645         np->r_freeb->r_freef = np->r_freef;
 646         np->r_freef->r_freeb = np->r_freeb;
 647 
 648         np->r_freef = np->r_freeb = NULL;
 649 }
 650 
 651 /*
 652  * Put an smbnode in the "hash" AVL tree.
 653  *
 654  * The caller must be hold the rwlock as writer.
 655  *
 656  * From NFS: nfs_subr.c:rp_addhash
 657  */
 658 static void
 659 sn_addhash_locked(smbnode_t *np, avl_index_t where)
 660 {
 661         smbmntinfo_t *mi = np->n_mount;
 662 
 663         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 664 
 665         mutex_enter(&np->r_statelock);
 666         if ((np->r_flags & RHASHED) == 0) {
 667                 avl_insert(&mi->smi_hash_avl, np, where);
 668                 np->r_flags |= RHASHED;
 669         }
 670         mutex_exit(&np->r_statelock);
 671 }
 672 
 673 /*
 674  * Remove an smbnode from the "hash" AVL tree.
 675  *
 676  * The caller must hold the rwlock as writer.
 677  *
 678  * From NFS: nfs_subr.c:rp_rmhash_locked
 679  */
 680 static void
 681 sn_rmhash_locked(smbnode_t *np)
 682 {
 683         smbmntinfo_t *mi = np->n_mount;
 684 
 685         ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
 686 
 687         mutex_enter(&np->r_statelock);
 688         if ((np->r_flags & RHASHED) != 0) {
 689                 np->r_flags &= ~RHASHED;
 690                 avl_remove(&mi->smi_hash_avl, np);
 691         }
 692         mutex_exit(&np->r_statelock);
 693 }
 694 
 695 /*
 696  * Remove an smbnode from the "hash" AVL tree.
 697  *
 698  * The caller must not be holding the rwlock.
 699  */
 700 void
 701 smbfs_rmhash(smbnode_t *np)
 702 {
 703         smbmntinfo_t *mi = np->n_mount;
 704 
 705         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 706         sn_rmhash_locked(np);
 707         rw_exit(&mi->smi_hash_lk);
 708 }
 709 
 710 /*
 711  * Lookup an smbnode by remote pathname
 712  *
 713  * The caller must be holding the AVL rwlock, either shared or exclusive.
 714  *
 715  * From NFS: nfs_subr.c:rfind
 716  */
 717 static smbnode_t *
 718 sn_hashfind(
 719         smbmntinfo_t *mi,
 720         const char *rpath,
 721         int rplen,
 722         avl_index_t *pwhere) /* optional */
 723 {
 724         smbfs_node_hdr_t nhdr;
 725         smbnode_t *np;
 726         vnode_t *vp;
 727 
 728         ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
 729 
 730         bzero(&nhdr, sizeof (nhdr));
 731         nhdr.hdr_n_rpath = (char *)rpath;
 732         nhdr.hdr_n_rplen = rplen;
 733 
 734         /* See smbfs_node_cmp below. */
 735         np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
 736 
 737         if (np == NULL)
 738                 return (NULL);
 739 
 740         /*
 741          * Found it in the "hash" AVL tree.
 742          * Remove from free list, if necessary.
 743          */
 744         vp = SMBTOV(np);
 745         if (np->r_freef != NULL) {
 746                 mutex_enter(&smbfreelist_lock);
 747                 /*
 748                  * If the smbnode is on the freelist,
 749                  * then remove it and use that reference
 750                  * as the new reference.  Otherwise,
 751                  * need to increment the reference count.
 752                  */
 753                 if (np->r_freef != NULL) {
 754                         sn_rmfree(np);
 755                         mutex_exit(&smbfreelist_lock);
 756                 } else {
 757                         mutex_exit(&smbfreelist_lock);
 758                         VN_HOLD(vp);
 759                 }
 760         } else
 761                 VN_HOLD(vp);
 762 
 763         return (np);
 764 }
 765 
 766 static int
 767 smbfs_node_cmp(const void *va, const void *vb)
 768 {
 769         const smbfs_node_hdr_t *a = va;
 770         const smbfs_node_hdr_t *b = vb;
 771         int clen, diff;
 772 
 773         /*
 774          * Same semantics as strcmp, but does not
 775          * assume the strings are null terminated.
 776          */
 777         clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
 778             a->hdr_n_rplen : b->hdr_n_rplen;
 779         diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
 780         if (diff < 0)
 781                 return (-1);
 782         if (diff > 0)
 783                 return (1);
 784         /* they match through clen */
 785         if (b->hdr_n_rplen > clen)
 786                 return (-1);
 787         if (a->hdr_n_rplen > clen)
 788                 return (1);
 789         return (0);
 790 }
 791 
 792 /*
 793  * Setup the "hash" AVL tree used for our node cache.
 794  * See: smbfs_mount, smbfs_destroy_table.
 795  */
 796 void
 797 smbfs_init_hash_avl(avl_tree_t *avl)
 798 {
 799         avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
 800             offsetof(smbnode_t, r_avl_node));
 801 }
 802 
 803 /*
 804  * Invalidate the cached attributes for all nodes "under" the
 805  * passed-in node.  Note: the passed-in node is NOT affected by
 806  * this call.  This is used both for files under some directory
 807  * after the directory is deleted or renamed, and for extended
 808  * attribute files (named streams) under a plain file after that
 809  * file is renamed or deleted.
 810  *
 811  * Do this by walking the AVL tree starting at the passed in node,
 812  * and continuing while the visited nodes have a path prefix matching
 813  * the entire path of the passed-in node, and a separator just after
 814  * that matching path prefix.  Watch out for cases where the AVL tree
 815  * order may not exactly match the order of an FS walk, i.e.
 816  * consider this sequence:
 817  *      "foo"           (directory)
 818  *      "foo bar"       (name containing a space)
 819  *      "foo/bar"
 820  * The walk needs to skip "foo bar" and keep going until it finds
 821  * something that doesn't match the "foo" name prefix.
 822  */
 823 void
 824 smbfs_attrcache_prune(smbnode_t *top_np)
 825 {
 826         smbmntinfo_t *mi;
 827         smbnode_t *np;
 828         char *rpath;
 829         int rplen;
 830 
 831         mi = top_np->n_mount;
 832         rw_enter(&mi->smi_hash_lk, RW_READER);
 833 
 834         np = top_np;
 835         rpath = top_np->n_rpath;
 836         rplen = top_np->n_rplen;
 837         for (;;) {
 838                 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
 839                 if (np == NULL)
 840                         break;
 841                 if (np->n_rplen < rplen)
 842                         break;
 843                 if (0 != strncmp(np->n_rpath, rpath, rplen))
 844                         break;
 845                 if (np->n_rplen > rplen && (
 846                     np->n_rpath[rplen] == ':' ||
 847                     np->n_rpath[rplen] == '\\'))
 848                         smbfs_attrcache_remove(np);
 849         }
 850 
 851         rw_exit(&mi->smi_hash_lk);
 852 }
 853 
 854 #ifdef SMB_VNODE_DEBUG
 855 int smbfs_check_table_debug = 1;
 856 #else /* SMB_VNODE_DEBUG */
 857 int smbfs_check_table_debug = 0;
 858 #endif /* SMB_VNODE_DEBUG */
 859 
 860 
 861 /*
 862  * Return 1 if there is a active vnode belonging to this vfs in the
 863  * smbnode cache.
 864  *
 865  * Several of these checks are done without holding the usual
 866  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
 867  * etc. will redo the necessary checks before actually destroying
 868  * any smbnodes.
 869  *
 870  * From NFS: nfs_subr.c:check_rtable
 871  *
 872  * Debugging changes here relative to NFS.
 873  * Relatively harmless, so left 'em in.
 874  */
 875 int
 876 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
 877 {
 878         smbmntinfo_t *mi;
 879         smbnode_t *np;
 880         vnode_t *vp;
 881         int busycnt = 0;
 882 
 883         mi = VFTOSMI(vfsp);
 884         rw_enter(&mi->smi_hash_lk, RW_READER);
 885         for (np = avl_first(&mi->smi_hash_avl); np != NULL;
 886             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
 887 
 888                 if (np == rtnp)
 889                         continue; /* skip the root */
 890                 vp = SMBTOV(np);
 891 
 892                 /* Now the 'busy' checks: */
 893                 /* Not on the free list? */
 894                 if (np->r_freef == NULL) {
 895                         SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
 896                             (void *)np, np->n_rpath);
 897                         busycnt++;
 898                 }
 899 
 900                 /* Has dirty pages? */
 901                 if (vn_has_cached_data(vp) &&
 902                     (np->r_flags & RDIRTY)) {
 903                         SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
 904                             (void *)np, np->n_rpath);
 905                         busycnt++;
 906                 }
 907 
 908                 /* Other refs? (not reflected in v_count) */
 909                 if (np->r_count > 0) {
 910                         SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
 911                             (void *)np, np->n_rpath);
 912                         busycnt++;
 913                 }
 914 
 915                 if (busycnt && !smbfs_check_table_debug)
 916                         break;
 917 
 918         }
 919         rw_exit(&mi->smi_hash_lk);
 920 
 921         return (busycnt);
 922 }
 923 
 924 /*
 925  * Destroy inactive vnodes from the AVL tree which belong to this
 926  * vfs.  It is essential that we destroy all inactive vnodes during a
 927  * forced unmount as well as during a normal unmount.
 928  *
 929  * Based on NFS: nfs_subr.c:destroy_rtable
 930  *
 931  * In here, we're normally destrying all or most of the AVL tree,
 932  * so the natural choice is to use avl_destroy_nodes.  However,
 933  * there may be a few busy nodes that should remain in the AVL
 934  * tree when we're done.  The solution: use a temporary tree to
 935  * hold the busy nodes until we're done destroying the old tree,
 936  * then copy the temporary tree over the (now emtpy) real tree.
 937  */
 938 void
 939 smbfs_destroy_table(struct vfs *vfsp)
 940 {
 941         avl_tree_t tmp_avl;
 942         smbmntinfo_t *mi;
 943         smbnode_t *np;
 944         smbnode_t *rlist;
 945         void *v;
 946 
 947         mi = VFTOSMI(vfsp);
 948         rlist = NULL;
 949         smbfs_init_hash_avl(&tmp_avl);
 950 
 951         rw_enter(&mi->smi_hash_lk, RW_WRITER);
 952         v = NULL;
 953         while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
 954 
 955                 mutex_enter(&smbfreelist_lock);
 956                 if (np->r_freef == NULL) {
 957                         /*
 958                          * Busy node (not on the free list).
 959                          * Will keep in the final AVL tree.
 960                          */
 961                         mutex_exit(&smbfreelist_lock);
 962                         avl_add(&tmp_avl, np);
 963                 } else {
 964                         /*
 965                          * It's on the free list.  Remove and
 966                          * arrange for it to be destroyed.
 967                          */
 968                         sn_rmfree(np);
 969                         mutex_exit(&smbfreelist_lock);
 970 
 971                         /*
 972                          * Last part of sn_rmhash_locked().
 973                          * NB: avl_destroy_nodes has already
 974                          * removed this from the "hash" AVL.
 975                          */
 976                         mutex_enter(&np->r_statelock);
 977                         np->r_flags &= ~RHASHED;
 978                         mutex_exit(&np->r_statelock);
 979 
 980                         /*
 981                          * Add to the list of nodes to destroy.
 982                          * Borrowing avl_child[0] for this list.
 983                          */
 984                         np->r_avl_node.avl_child[0] =
 985                             (struct avl_node *)rlist;
 986                         rlist = np;
 987                 }
 988         }
 989         avl_destroy(&mi->smi_hash_avl);
 990 
 991         /*
 992          * Replace the (now destroyed) "hash" AVL with the
 993          * temporary AVL, which restores the busy nodes.
 994          */
 995         mi->smi_hash_avl = tmp_avl;
 996         rw_exit(&mi->smi_hash_lk);
 997 
 998         /*
 999          * Now destroy the nodes on our temporary list (rlist).
1000          * This call to smbfs_addfree will end up destroying the
1001          * smbnode, but in a safe way with the appropriate set
1002          * of checks done.
1003          */
1004         while ((np = rlist) != NULL) {
1005                 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1006                 smbfs_addfree(np);
1007         }
1008 }
1009 
1010 /*
1011  * This routine destroys all the resources associated with the smbnode
1012  * and then the smbnode itself.  Note: sn_inactive has been called.
1013  *
1014  * From NFS: nfs_subr.c:destroy_rnode
1015  */
1016 static void
1017 sn_destroy_node(smbnode_t *np)
1018 {
1019         vnode_t *vp;
1020         vfs_t *vfsp;
1021 
1022         vp = SMBTOV(np);
1023         vfsp = vp->v_vfsp;
1024 
1025         ASSERT(vp->v_count == 1);
1026         ASSERT(np->r_count == 0);
1027         ASSERT(np->r_mapcnt == 0);
1028         ASSERT(np->r_secattr.vsa_aclentp == NULL);
1029         ASSERT(np->r_cred == NULL);
1030         ASSERT(np->n_rpath == NULL);
1031         ASSERT(!(np->r_flags & RHASHED));
1032         ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1033         atomic_dec_ulong((ulong_t *)&smbnodenew);
1034         vn_invalid(vp);
1035         vn_free(vp);
1036         kmem_cache_free(smbnode_cache, np);
1037         VFS_RELE(vfsp);
1038 }
1039 
1040 /*
1041  * From NFS rflush()
1042  * Flush all vnodes in this (or every) vfs.
1043  * Used by smbfs_sync and by smbfs_unmount.
1044  */
1045 /*ARGSUSED*/
1046 void
1047 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1048 {
1049         smbmntinfo_t *mi;
1050         smbnode_t *np;
1051         vnode_t *vp, **vplist;
1052         long num, cnt;
1053 
1054         mi = VFTOSMI(vfsp);
1055 
1056         /*
1057          * Check to see whether there is anything to do.
1058          */
1059         num = avl_numnodes(&mi->smi_hash_avl);
1060         if (num == 0)
1061                 return;
1062 
1063         /*
1064          * Allocate a slot for all currently active rnodes on the
1065          * supposition that they all may need flushing.
1066          */
1067         vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1068         cnt = 0;
1069 
1070         /*
1071          * Walk the AVL tree looking for rnodes with page
1072          * lists associated with them.  Make a list of these
1073          * files.
1074          */
1075         rw_enter(&mi->smi_hash_lk, RW_READER);
1076         for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1077             np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1078                 vp = SMBTOV(np);
1079                 /*
1080                  * Don't bother sync'ing a vp if it
1081                  * is part of virtual swap device or
1082                  * if VFS is read-only
1083                  */
1084                 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1085                         continue;
1086                 /*
1087                  * If the vnode has pages and is marked as either
1088                  * dirty or mmap'd, hold and add this vnode to the
1089                  * list of vnodes to flush.
1090                  */
1091                 if (vn_has_cached_data(vp) &&
1092                     ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1093                         VN_HOLD(vp);
1094                         vplist[cnt++] = vp;
1095                         if (cnt == num)
1096                                 break;
1097                 }
1098         }
1099         rw_exit(&mi->smi_hash_lk);
1100 
1101         /*
1102          * Flush and release all of the files on the list.
1103          */
1104         while (cnt-- > 0) {
1105                 vp = vplist[cnt];
1106                 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1107                 VN_RELE(vp);
1108         }
1109 
1110         kmem_free(vplist, num * sizeof (vnode_t *));
1111 }
1112 
1113 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1114 
1115 /*
1116  * Set or Clear direct I/O flag
1117  * VOP_RWLOCK() is held for write access to prevent a race condition
1118  * which would occur if a process is in the middle of a write when
1119  * directio flag gets set. It is possible that all pages may not get flushed.
1120  * From nfs_common.c
1121  */
1122 
1123 /* ARGSUSED */
1124 int
1125 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1126 {
1127         int     error = 0;
1128         smbnode_t       *np;
1129 
1130         np = VTOSMB(vp);
1131 
1132         if (cmd == DIRECTIO_ON) {
1133 
1134                 if (np->r_flags & RDIRECTIO)
1135                         return (0);
1136 
1137                 /*
1138                  * Flush the page cache.
1139                  */
1140 
1141                 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142 
1143                 if (np->r_flags & RDIRECTIO) {
1144                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1145                         return (0);
1146                 }
1147 
1148                 /* Here NFS also checks ->r_awcount */
1149                 if (vn_has_cached_data(vp) &&
1150                     (np->r_flags & RDIRTY) != 0) {
1151                         error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1152                             B_INVAL, cr, NULL);
1153                         if (error) {
1154                                 if (error == ENOSPC || error == EDQUOT) {
1155                                         mutex_enter(&np->r_statelock);
1156                                         if (!np->r_error)
1157                                                 np->r_error = error;
1158                                         mutex_exit(&np->r_statelock);
1159                                 }
1160                                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1161                                 return (error);
1162                         }
1163                 }
1164 
1165                 mutex_enter(&np->r_statelock);
1166                 np->r_flags |= RDIRECTIO;
1167                 mutex_exit(&np->r_statelock);
1168                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1169                 return (0);
1170         }
1171 
1172         if (cmd == DIRECTIO_OFF) {
1173                 mutex_enter(&np->r_statelock);
1174                 np->r_flags &= ~RDIRECTIO;       /* disable direct mode */
1175                 mutex_exit(&np->r_statelock);
1176                 return (0);
1177         }
1178 
1179         return (EINVAL);
1180 }
1181 
1182 static kmutex_t smbfs_newnum_lock;
1183 static uint32_t smbfs_newnum_val = 0;
1184 
1185 /*
1186  * Return a number 0..0xffffffff that's different from the last
1187  * 0xffffffff numbers this returned.  Used for unlinked files.
1188  * From NFS nfs_subr.c newnum
1189  */
1190 uint32_t
1191 smbfs_newnum(void)
1192 {
1193         uint32_t id;
1194 
1195         mutex_enter(&smbfs_newnum_lock);
1196         if (smbfs_newnum_val == 0)
1197                 smbfs_newnum_val = (uint32_t)gethrestime_sec();
1198         id = smbfs_newnum_val++;
1199         mutex_exit(&smbfs_newnum_lock);
1200         return (id);
1201 }
1202 
1203 /*
1204  * Fill in a temporary name at buf
1205  */
1206 int
1207 smbfs_newname(char *buf, size_t buflen)
1208 {
1209         uint_t id;
1210         int n;
1211 
1212         id = smbfs_newnum();
1213         n = snprintf(buf, buflen, "~$smbfs%08X", id);
1214         return (n);
1215 }
1216 
1217 
1218 /*
1219  * initialize resources that are used by smbfs_subr.c
1220  * this is called from the _init() routine (by the way of smbfs_clntinit())
1221  *
1222  * From NFS: nfs_subr.c:nfs_subrinit
1223  */
1224 int
1225 smbfs_subrinit(void)
1226 {
1227         ulong_t nsmbnode_max;
1228 
1229         /*
1230          * Allocate and initialize the smbnode cache
1231          */
1232         if (nsmbnode <= 0)
1233                 nsmbnode = ncsize; /* dnlc.h */
1234         nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1235             sizeof (struct smbnode));
1236         if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1237                 cmn_err(CE_NOTE,
1238                     "setting nsmbnode to max value of %ld", nsmbnode_max);
1239                 nsmbnode = nsmbnode_max;
1240         }
1241 
1242         smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1243             0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1244 
1245         /*
1246          * Initialize the various mutexes and reader/writer locks
1247          */
1248         mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1249         mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1250 
1251         /*
1252          * Assign unique major number for all smbfs mounts
1253          */
1254         if ((smbfs_major = getudev()) == -1) {
1255                 cmn_err(CE_WARN,
1256                     "smbfs: init: can't get unique device number");
1257                 smbfs_major = 0;
1258         }
1259         smbfs_minor = 0;
1260 
1261         return (0);
1262 }
1263 
1264 /*
1265  * free smbfs hash table, etc.
1266  * From NFS: nfs_subr.c:nfs_subrfini
1267  */
1268 void
1269 smbfs_subrfini(void)
1270 {
1271 
1272         /*
1273          * Destroy the smbnode cache
1274          */
1275         kmem_cache_destroy(smbnode_cache);
1276 
1277         /*
1278          * Destroy the various mutexes and reader/writer locks
1279          */
1280         mutex_destroy(&smbfreelist_lock);
1281         mutex_destroy(&smbfs_minor_lock);
1282 }
1283 
1284 /* rddir_cache ? */
1285 
1286 /*
1287  * Support functions for smbfs_kmem_reclaim
1288  */
1289 
1290 static void
1291 smbfs_node_reclaim(void)
1292 {
1293         smbmntinfo_t *mi;
1294         smbnode_t *np;
1295         vnode_t *vp;
1296 
1297         mutex_enter(&smbfreelist_lock);
1298         while ((np = smbfreelist) != NULL) {
1299                 sn_rmfree(np);
1300                 mutex_exit(&smbfreelist_lock);
1301                 if (np->r_flags & RHASHED) {
1302                         vp = SMBTOV(np);
1303                         mi = np->n_mount;
1304                         rw_enter(&mi->smi_hash_lk, RW_WRITER);
1305                         mutex_enter(&vp->v_lock);
1306                         if (vp->v_count > 1) {
1307                                 VN_RELE_LOCKED(vp);
1308                                 mutex_exit(&vp->v_lock);
1309                                 rw_exit(&mi->smi_hash_lk);
1310                                 mutex_enter(&smbfreelist_lock);
1311                                 continue;
1312                         }
1313                         mutex_exit(&vp->v_lock);
1314                         sn_rmhash_locked(np);
1315                         rw_exit(&mi->smi_hash_lk);
1316                 }
1317                 /*
1318                  * This call to smbfs_addfree will end up destroying the
1319                  * smbnode, but in a safe way with the appropriate set
1320                  * of checks done.
1321                  */
1322                 smbfs_addfree(np);
1323                 mutex_enter(&smbfreelist_lock);
1324         }
1325         mutex_exit(&smbfreelist_lock);
1326 }
1327 
1328 /*
1329  * Called by kmem_cache_alloc ask us if we could
1330  * "Please give back some memory!"
1331  *
1332  * Todo: dump nodes from the free list?
1333  */
1334 /*ARGSUSED*/
1335 void
1336 smbfs_kmem_reclaim(void *cdrarg)
1337 {
1338         smbfs_node_reclaim();
1339 }
1340 
1341 /*
1342  * Here NFS has failover stuff and
1343  * nfs_rw_xxx - see smbfs_rwlock.c
1344  */