Print this page
    
Extra lint fixes
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/io/inotify.c
          +++ new/usr/src/uts/common/io/inotify.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  14   14   * Copyright (c) 2015 The MathWorks, Inc.  All rights reserved.
  15   15   */
  16   16  
  17   17  /*
  18   18   * Support for the inotify facility, a Linux-borne facility for asynchronous
  19   19   * notification of certain events on specified files or directories.  Our
  20   20   * implementation broadly leverages the file event monitoring facility, and
  21   21   * would actually be quite straightforward were it not for a very serious
  22   22   * blunder in the inotify interface:  in addition to allowing for one to be
  23   23   * notified on events on a particular file or directory, inotify also allows
  24   24   * for one to be notified on certain events on files _within_ a watched
  25   25   * directory -- even though those events have absolutely nothing to do with
  26   26   * the directory itself.  This leads to all sorts of madness because file
  27   27   * operations are (of course) not undertaken on paths but rather on open
  28   28   * files -- and the relationships between open files and the paths that resolve
  29   29   * to those files are neither static nor isomorphic.  We implement this
  30   30   * concept by having _child watches_ when directories are watched with events
  31   31   * in IN_CHILD_EVENTS.  We add child watches when a watch on a directory is
  32   32   * first added, and we modify those child watches dynamically as files are
  33   33   * created, deleted, moved into or moved out of the specified directory.  This
  34   34   * mechanism works well, absent hard links.  Hard links, unfortunately, break
  35   35   * this rather badly, and the user is warned that watches on directories that
  36   36   * have multiple directory entries referring to the same file may behave
  37   37   * unexpectedly.
  38   38   */
  39   39  
  40   40  #include <sys/ddi.h>
  41   41  #include <sys/sunddi.h>
  42   42  #include <sys/inotify.h>
  43   43  #include <sys/fem.h>
  44   44  #include <sys/conf.h>
  45   45  #include <sys/stat.h>
  46   46  #include <sys/vfs_opreg.h>
  47   47  #include <sys/vmem.h>
  48   48  #include <sys/avl.h>
  49   49  #include <sys/sysmacros.h>
  50   50  #include <sys/cyclic.h>
  51   51  #include <sys/filio.h>
  52   52  
  53   53  struct inotify_state;
  54   54  struct inotify_kevent;
  55   55  
  56   56  typedef struct inotify_watch inotify_watch_t;
  57   57  typedef struct inotify_state inotify_state_t;
  58   58  typedef struct inotify_kevent inotify_kevent_t;
  59   59  
  60   60  struct inotify_watch {
  61   61          kmutex_t inw_lock;                      /* lock protecting ref count */
  62   62          int inw_refcnt;                         /* reference count */
  63   63          uint8_t inw_zombie:1;                   /* boolean: is zombie */
  64   64          uint8_t inw_fired:1;                    /* boolean: fired one-shot */
  65   65          uint8_t inw_active:1;                   /* boolean: watch is active */
  66   66          uint8_t inw_orphaned:1;                 /* boolean: orphaned */
  67   67          kcondvar_t inw_cv;                      /* condvar for zombifier */
  68   68          uint32_t inw_mask;                      /* mask of watch */
  69   69          int32_t inw_wd;                         /* watch descriptor */
  70   70          vnode_t *inw_vp;                        /* underlying vnode */
  71   71          inotify_watch_t *inw_parent;            /* parent, if a child */
  72   72          avl_node_t inw_byvp;                    /* watches by vnode */
  73   73          avl_node_t inw_bywd;                    /* watches by descriptor */
  74   74          avl_tree_t inw_children;                /* children, if a parent */
  75   75          char *inw_name;                         /* name, if a child */
  76   76          list_node_t inw_orphan;                 /* orphan list */
  77   77          cred_t *inw_cred;                       /* cred, if orphaned */
  78   78          inotify_state_t *inw_state;             /* corresponding state */
  79   79  };
  80   80  
  81   81  struct inotify_kevent {
  82   82          inotify_kevent_t *ine_next;             /* next event in queue */
  83   83          struct inotify_event ine_event;         /* event (variable size) */
  84   84  };
  85   85  
  86   86  #define INOTIFY_EVENT_LENGTH(ev) \
  87   87          (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
  88   88  
  89   89  struct inotify_state {
  90   90          kmutex_t ins_lock;                      /* lock protecting state */
  91   91          avl_tree_t ins_byvp;                    /* watches by vnode */
  92   92          avl_tree_t ins_bywd;                    /* watches by descriptor */
  93   93          vmem_t *ins_wds;                        /* watch identifier arena */
  94   94          int ins_maxwatches;                     /* maximum number of watches */
  95   95          int ins_maxevents;                      /* maximum number of events */
  96   96          int ins_nevents;                        /* current # of events */
  97   97          int32_t ins_size;                       /* total size of events */
  98   98          inotify_kevent_t *ins_head;             /* head of event queue */
  99   99          inotify_kevent_t *ins_tail;             /* tail of event queue */
 100  100          pollhead_t ins_pollhd;                  /* poll head */
 101  101          kcondvar_t ins_cv;                      /* condvar for reading */
 102  102          list_t ins_orphans;                     /* orphan list */
 103  103          ddi_periodic_t ins_cleaner;             /* cyclic for cleaning */
 104  104          inotify_watch_t *ins_zombies;           /* zombie watch list */
 105  105          cred_t *ins_cred;                       /* creator's credentials */
 106  106          inotify_state_t *ins_next;              /* next state on global list */
 107  107  };
 108  108  
 109  109  /*
 110  110   * Tunables (exported read-only in lx-branded zones via /proc).
 111  111   */
 112  112  int     inotify_maxwatches = 8192;              /* max watches per instance */
 113  113  int     inotify_maxevents = 16384;              /* max events */
 114  114  int     inotify_maxinstances = 128;             /* max instances per user */
 115  115  
 116  116  /*
 117  117   * Internal global variables.
 118  118   */
 119  119  static kmutex_t         inotify_lock;           /* lock protecting state */
 120  120  static dev_info_t       *inotify_devi;          /* device info */
 121  121  static fem_t            *inotify_femp;          /* FEM pointer */
 122  122  static vmem_t           *inotify_minor;         /* minor number arena */
 123  123  static void             *inotify_softstate;     /* softstate pointer */
 124  124  static inotify_state_t  *inotify_state;         /* global list if state */
 125  125  
 126  126  static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
 127  127  static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
 128  128  static void inotify_watch_delete(inotify_watch_t *, uint32_t);
 129  129  static void inotify_watch_remove(inotify_state_t *state,
 130  130          inotify_watch_t *watch);
 131  131  
 132  132  static int
 133  133  inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
 134  134      cred_t *cr, caller_context_t *ct)
 135  135  {
 136  136          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 137  137          int rval;
 138  138  
 139  139          if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
 140  140                  inotify_watch_event(watch, flag & FWRITE ?
 141  141                      IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
 142  142          }
 143  143  
 144  144          return (rval);
 145  145  }
 146  146  
 147  147  static int
 148  148  inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
 149  149      int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
 150  150      vsecattr_t *vsecp)
 151  151  {
 152  152          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 153  153          int rval;
 154  154  
 155  155          if ((rval = vnext_create(vf, name, vap, excl, mode,
 156  156              vpp, cr, flag, ct, vsecp)) == 0) {
 157  157                  inotify_watch_insert(watch, *vpp, name);
 158  158                  inotify_watch_event(watch, IN_CREATE, name);
 159  159          }
 160  160  
 161  161          return (rval);
 162  162  }
 163  163  
 164  164  static int
 165  165  inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
 166  166      caller_context_t *ct, int flags)
 167  167  {
 168  168          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 169  169          int rval;
 170  170  
 171  171          if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
 172  172                  inotify_watch_insert(watch, svp, tnm);
 173  173                  inotify_watch_event(watch, IN_CREATE, tnm);
 174  174          }
 175  175  
 176  176          return (rval);
 177  177  }
 178  178  
 179  179  static int
 180  180  inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
 181  181      cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
 182  182  {
 183  183          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 184  184          int rval;
 185  185  
 186  186          if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
 187  187              ct, flags, vsecp)) == 0) {
 188  188                  inotify_watch_insert(watch, *vpp, name);
 189  189                  inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
 190  190          }
 191  191  
 192  192          return (rval);
 193  193  }
 194  194  
 195  195  static int
 196  196  inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
 197  197  {
 198  198          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 199  199          int rval;
 200  200  
 201  201          if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
 202  202                  inotify_watch_event(watch, IN_OPEN, NULL);
 203  203  
 204  204          return (rval);
 205  205  }
 206  206  
 207  207  static int
 208  208  inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
 209  209      caller_context_t *ct)
 210  210  {
 211  211          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 212  212          int rval = vnext_read(vf, uiop, ioflag, cr, ct);
 213  213          inotify_watch_event(watch, IN_ACCESS, NULL);
 214  214  
 215  215          return (rval);
 216  216  }
 217  217  
 218  218  static int
 219  219  inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
 220  220      caller_context_t *ct, int flags)
 221  221  {
 222  222          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 223  223          int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
 224  224          inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
 225  225  
 226  226          return (rval);
 227  227  }
 228  228  
 229  229  int
 230  230  inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
 231  231      int flags)
 232  232  {
 233  233          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 234  234          int rval;
 235  235  
 236  236          if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
 237  237                  inotify_watch_event(watch, IN_DELETE, nm);
 238  238  
 239  239          return (rval);
 240  240  }
 241  241  
 242  242  int
 243  243  inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
 244  244      caller_context_t *ct, int flags)
 245  245  {
 246  246          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 247  247          int rval;
 248  248  
 249  249          if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
 250  250                  inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
 251  251  
 252  252          return (rval);
 253  253  }
 254  254  
 255  255  static int
 256  256  inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
 257  257      caller_context_t *ct)
 258  258  {
 259  259          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 260  260          int rval;
 261  261  
 262  262          if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
 263  263                  inotify_watch_event(watch, IN_ATTRIB, NULL);
 264  264  
 265  265          return (rval);
 266  266  }
 267  267  
 268  268  static int
 269  269  inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
 270  270      caller_context_t *ct)
 271  271  {
 272  272          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 273  273          int rval = vnext_write(vf, uiop, ioflag, cr, ct);
 274  274          inotify_watch_event(watch, IN_MODIFY, NULL);
 275  275  
 276  276          return (rval);
 277  277  }
 278  278  
 279  279  static int
 280  280  inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
 281  281      caller_context_t *ct)
 282  282  {
 283  283          inotify_watch_t *watch = vf->fa_fnode->fn_available;
 284  284  
 285  285          switch (vnevent) {
 286  286          case VE_RENAME_SRC:
 287  287                  inotify_watch_event(watch, IN_MOVE_SELF, NULL);
 288  288                  inotify_watch_delete(watch, IN_MOVE_SELF);
 289  289                  break;
 290  290          case VE_REMOVE:
 291  291                  /*
 292  292                   * Linux will apparently fire an IN_ATTRIB event when the link
 293  293                   * count changes (including when it drops to 0 on a remove).
 294  294                   * This is merely somewhat odd; what is amazing is that this
 295  295                   * IN_ATTRIB event is not visible on an inotify watch on the
 296  296                   * parent directory.  (IN_ATTRIB events are normally sent to
 297  297                   * watches on the parent directory).  While it's hard to
 298  298                   * believe that this constitutes desired semantics, ltp
 299  299                   * unfortunately tests this case (if implicitly); in the name
 300  300                   * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
 301  301                   * explicitly watching the file that has been removed.
 302  302                   */
 303  303                  if (watch->inw_parent == NULL)
 304  304                          inotify_watch_event(watch, IN_ATTRIB, NULL);
 305  305  
 306  306                  /*FALLTHROUGH*/
 307  307          case VE_RENAME_DEST:
 308  308                  inotify_watch_event(watch, IN_DELETE_SELF, NULL);
 309  309                  inotify_watch_delete(watch, IN_DELETE_SELF);
 310  310                  break;
 311  311          case VE_RMDIR:
 312  312                  /*
 313  313                   * It seems that IN_ISDIR should really be OR'd in here, but
 314  314                   * Linux doesn't seem to do that in this case; for the sake of
 315  315                   * bug-for-bug compatibility, we don't do it either.
 316  316                   */
 317  317                  inotify_watch_event(watch, IN_DELETE_SELF, NULL);
 318  318                  inotify_watch_delete(watch, IN_DELETE_SELF);
 319  319                  break;
 320  320          case VE_CREATE:
 321  321          case VE_TRUNCATE:
 322  322          case VE_RESIZE:
 323  323                  inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
 324  324                  break;
 325  325          case VE_LINK:
 326  326                  inotify_watch_event(watch, IN_ATTRIB, NULL);
 327  327                  break;
 328  328          case VE_RENAME_SRC_DIR:
 329  329                  inotify_watch_event(watch, IN_MOVED_FROM, name);
 330  330                  break;
 331  331          case VE_RENAME_DEST_DIR:
 332  332                  if (name == NULL)
 333  333                          name = dvp->v_path;
 334  334  
 335  335                  inotify_watch_insert(watch, dvp, name);
 336  336                  inotify_watch_event(watch, IN_MOVED_TO, name);
 337  337                  break;
 338  338          case VE_SUPPORT:
 339  339          case VE_MOUNTEDOVER:
 340  340          case VE_PRE_RENAME_SRC:
 341  341          case VE_PRE_RENAME_DEST:
 342  342          case VE_PRE_RENAME_DEST_DIR:
 343  343                  break;
 344  344          }
 345  345  
 346  346          return (vnext_vnevent(vf, vnevent, dvp, name, ct));
 347  347  }
 348  348  
 349  349  const fs_operation_def_t inotify_vnodesrc_template[] = {
 350  350          VOPNAME_CLOSE,          { .femop_close = inotify_fop_close },
 351  351          VOPNAME_CREATE,         { .femop_create = inotify_fop_create },
 352  352          VOPNAME_LINK,           { .femop_link = inotify_fop_link },
 353  353          VOPNAME_MKDIR,          { .femop_mkdir = inotify_fop_mkdir },
 354  354          VOPNAME_OPEN,           { .femop_open = inotify_fop_open },
 355  355          VOPNAME_READ,           { .femop_read = inotify_fop_read },
 356  356          VOPNAME_READDIR,        { .femop_readdir = inotify_fop_readdir },
 357  357          VOPNAME_REMOVE,         { .femop_remove = inotify_fop_remove },
 358  358          VOPNAME_RMDIR,          { .femop_rmdir = inotify_fop_rmdir },
 359  359          VOPNAME_SETATTR,        { .femop_setattr = inotify_fop_setattr },
 360  360          VOPNAME_WRITE,          { .femop_write = inotify_fop_write },
 361  361          VOPNAME_VNEVENT,        { .femop_vnevent = inotify_fop_vnevent },
 362  362          NULL, NULL
 363  363  };
 364  364  
 365  365  static int
 366  366  inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
 367  367  {
 368  368          if (lhs->inw_wd < rhs->inw_wd)
 369  369                  return (-1);
 370  370  
 371  371          if (lhs->inw_wd > rhs->inw_wd)
 372  372                  return (1);
 373  373  
 374  374          return (0);
 375  375  }
 376  376  
 377  377  static int
 378  378  inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
 379  379  {
 380  380          uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
 381  381  
 382  382          if (lvp < rvp)
 383  383                  return (-1);
 384  384  
 385  385          if (lvp > rvp)
 386  386                  return (1);
 387  387  
 388  388          return (0);
 389  389  }
 390  390  
 391  391  static void
 392  392  inotify_watch_hold(inotify_watch_t *watch)
 393  393  {
 394  394          mutex_enter(&watch->inw_lock);
 395  395          VERIFY(watch->inw_refcnt > 0);
 396  396          watch->inw_refcnt++;
 397  397          mutex_exit(&watch->inw_lock);
 398  398  }
 399  399  
 400  400  static void
 401  401  inotify_watch_release(inotify_watch_t *watch)
 402  402  {
 403  403          mutex_enter(&watch->inw_lock);
 404  404          VERIFY(watch->inw_refcnt > 1);
 405  405  
 406  406          if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
 407  407                  /*
 408  408                   * We're down to our last reference; kick anyone that might be
 409  409                   * waiting.
 410  410                   */
 411  411                  cv_signal(&watch->inw_cv);
 412  412          }
 413  413  
 414  414          mutex_exit(&watch->inw_lock);
 415  415  }
 416  416  
 417  417  static void
 418  418  inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
 419  419  {
 420  420          inotify_kevent_t *event, *tail;
 421  421          inotify_state_t *state = watch->inw_state;
 422  422          uint32_t wd = watch->inw_wd, cookie = 0, len;
 423  423          boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
 424  424          inotify_watch_t *source = watch;
 425  425  
 426  426          if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
 427  427                  return;
 428  428  
 429  429          if (watch->inw_parent != NULL) {
 430  430                  /*
 431  431                   * This is an event on the child; if this isn't a valid child
 432  432                   * event, return.  Otherwise, we move our watch to be our
 433  433                   * parent (which we know is around because we have a hold on
 434  434                   * it) and continue.
 435  435                   */
 436  436                  if (!(mask & IN_CHILD_EVENTS))
 437  437                          return;
 438  438  
 439  439                  name = watch->inw_name;
 440  440                  watch = watch->inw_parent;
 441  441                  wd = watch->inw_wd;
 442  442          }
 443  443  
 444  444          if (!removal) {
 445  445                  mutex_enter(&state->ins_lock);
 446  446  
 447  447                  if (watch->inw_zombie ||
 448  448                      watch->inw_fired || !watch->inw_active) {
 449  449                          mutex_exit(&state->ins_lock);
 450  450                          return;
 451  451                  }
 452  452          } else {
 453  453                  if (!watch->inw_active)
 454  454                          return;
 455  455  
 456  456                  VERIFY(MUTEX_HELD(&state->ins_lock));
 457  457          }
 458  458  
 459  459          /*
 460  460           * If this is an operation on a directory and it's a child event
 461  461           * (event if it's not on a child), we specify IN_ISDIR.
 462  462           */
 463  463          if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
 464  464                  mask |= IN_ISDIR;
 465  465  
 466  466          if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
 467  467                  cookie = (uint32_t)curthread->t_did;
 468  468  
 469  469          if (state->ins_nevents >= state->ins_maxevents) {
 470  470                  /*
 471  471                   * We're at our maximum number of events -- turn our event
 472  472                   * into an IN_Q_OVERFLOW event, which will be coalesced if
 473  473                   * it's already the tail event.
 474  474                   */
 475  475                  mask = IN_Q_OVERFLOW;
 476  476                  wd = (uint32_t)-1;
 477  477                  cookie = 0;
 478  478                  len = 0;
 479  479          }
 480  480  
 481  481          if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
 482  482              tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
 483  483              ((tail->ine_event.len == 0 && len == 0) ||
 484  484              (name != NULL && tail->ine_event.len != 0 &&
 485  485              strcmp(tail->ine_event.name, name) == 0))) {
 486  486                  /*
 487  487                   * This is an implicitly coalesced event; we're done.
 488  488                   */
 489  489                  if (!removal)
 490  490                          mutex_exit(&state->ins_lock);
 491  491                  return;
 492  492          }
 493  493  
 494  494          if (name != NULL) {
 495  495                  len = strlen(name) + 1;
 496  496                  len = roundup(len, sizeof (struct inotify_event));
 497  497          } else {
 498  498                  len = 0;
 499  499          }
 500  500  
 501  501          event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
 502  502          event->ine_event.wd = wd;
 503  503          event->ine_event.mask = (uint32_t)mask;
 504  504          event->ine_event.cookie = cookie;
 505  505          event->ine_event.len = len;
 506  506  
 507  507          if (name != NULL)
 508  508                  (void) strcpy(event->ine_event.name, name);
 509  509  
 510  510          if (tail != NULL) {
 511  511                  tail->ine_next = event;
 512  512          } else {
 513  513                  VERIFY(state->ins_head == NULL);
 514  514                  state->ins_head = event;
 515  515                  cv_broadcast(&state->ins_cv);
 516  516          }
 517  517  
 518  518          state->ins_tail = event;
 519  519          state->ins_nevents++;
 520  520          state->ins_size += sizeof (event->ine_event) + len;
 521  521  
 522  522          if (removal)
 523  523                  return;
 524  524  
 525  525          if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
 526  526                  /*
 527  527                   * If this is a one-shot, we need to remove the watch.  (Note
 528  528                   * that this will recurse back into inotify_watch_event() to
 529  529                   * fire the IN_IGNORED event -- but with "removal" set.)
 530  530                   */
 531  531                  watch->inw_fired = 1;
 532  532                  inotify_watch_remove(state, watch);
 533  533          }
 534  534  
 535  535          mutex_exit(&state->ins_lock);
 536  536          pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
 537  537  }
 538  538  
 539  539  /*
 540  540   * Destroy a watch.  By the time we're in here, the watch must have exactly
 541  541   * one reference.
 542  542   */
 543  543  static void
 544  544  inotify_watch_destroy(inotify_watch_t *watch)
 545  545  {
 546  546          VERIFY(MUTEX_HELD(&watch->inw_lock));
 547  547  
 548  548          if (watch->inw_name != NULL)
 549  549                  kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
 550  550  
 551  551          kmem_free(watch, sizeof (inotify_watch_t));
 552  552  }
 553  553  
 554  554  /*
 555  555   * Zombify a watch.  By the time we come in here, it must be true that the
 556  556   * watch has already been fem_uninstall()'d -- the only reference should be
 557  557   * in the state's data structure.  If we can get away with freeing it, we'll
 558  558   * do that -- but if the reference count is greater than one due to an active
 559  559   * vnode operation, we'll put this watch on the zombie list on the state
 560  560   * structure.
 561  561   */
 562  562  static void
 563  563  inotify_watch_zombify(inotify_watch_t *watch)
 564  564  {
 565  565          inotify_state_t *state = watch->inw_state;
 566  566  
 567  567          VERIFY(MUTEX_HELD(&state->ins_lock));
 568  568          VERIFY(!watch->inw_zombie);
 569  569  
 570  570          watch->inw_zombie = 1;
 571  571  
 572  572          if (watch->inw_parent != NULL) {
 573  573                  inotify_watch_release(watch->inw_parent);
 574  574          } else {
 575  575                  avl_remove(&state->ins_byvp, watch);
 576  576                  avl_remove(&state->ins_bywd, watch);
 577  577                  vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
 578  578                  watch->inw_wd = -1;
 579  579          }
 580  580  
 581  581          mutex_enter(&watch->inw_lock);
 582  582  
 583  583          if (watch->inw_refcnt == 1) {
 584  584                  /*
 585  585                   * There are no operations in flight and there is no way
 586  586                   * for anyone to discover this watch -- we can destroy it.
 587  587                   */
 588  588                  inotify_watch_destroy(watch);
 589  589          } else {
 590  590                  /*
 591  591                   * There are operations in flight; we will need to enqueue
 592  592                   * this for later destruction.
 593  593                   */
 594  594                  watch->inw_parent = state->ins_zombies;
 595  595                  state->ins_zombies = watch;
 596  596                  mutex_exit(&watch->inw_lock);
 597  597          }
 598  598  }
 599  599  
 600  600  static inotify_watch_t *
 601  601  inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
 602  602      const char *name, vnode_t *vp, uint32_t mask)
 603  603  {
 604  604          inotify_watch_t *watch;
 605  605          int err;
 606  606  
 607  607          VERIFY(MUTEX_HELD(&state->ins_lock));
 608  608  
 609  609          watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
 610  610  
 611  611          watch->inw_vp = vp;
 612  612          watch->inw_mask = mask;
 613  613          watch->inw_state = state;
 614  614          watch->inw_refcnt = 1;
 615  615  
 616  616          if (parent == NULL) {
 617  617                  watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
 618  618                      1, VM_BESTFIT | VM_SLEEP);
 619  619                  avl_add(&state->ins_byvp, watch);
 620  620                  avl_add(&state->ins_bywd, watch);
 621  621  
  
    | 
      ↓ open down ↓ | 
    621 lines elided | 
    
      ↑ open up ↑ | 
  
 622  622                  avl_create(&watch->inw_children,
 623  623                      (int(*)(const void *, const void *))inotify_watch_cmpvp,
 624  624                      sizeof (inotify_watch_t),
 625  625                      offsetof(inotify_watch_t, inw_byvp));
 626  626          } else {
 627  627                  VERIFY(name != NULL);
 628  628                  inotify_watch_hold(parent);
 629  629                  watch->inw_mask &= IN_CHILD_EVENTS;
 630  630                  watch->inw_parent = parent;
 631  631                  watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 632      -                strcpy(watch->inw_name, name);
      632 +                /* strcpy() is safe, because strlen(name) bounds us. */
      633 +                (void) strcpy(watch->inw_name, name);
 633  634  
 634  635                  avl_add(&parent->inw_children, watch);
 635  636          }
 636  637  
 637  638          /*
 638  639           * Add our monitor to the vnode.  We must not have the watch lock held
 639  640           * when we do this, as it will immediately hold our watch.
 640  641           */
 641  642          err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
 642  643              (void (*)(void *))inotify_watch_hold,
 643  644              (void (*)(void *))inotify_watch_release);
 644  645  
 645  646          VERIFY(err == 0);
 646  647  
 647  648          return (watch);
 648  649  }
 649  650  
 650  651  /*
 651  652   * Remove a (non-child) watch.  This is called from either synchronous context
 652  653   * via inotify_rm_watch() or monitor context via either a vnevent or a
 653  654   * one-shot.
 654  655   */
 655  656  static void
 656  657  inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
 657  658  {
 658  659          inotify_watch_t *child;
 659  660          int err;
 660  661  
 661  662          VERIFY(MUTEX_HELD(&state->ins_lock));
 662  663          VERIFY(watch->inw_parent == NULL);
 663  664  
 664  665          err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
 665  666          VERIFY(err == 0);
 666  667  
 667  668          /*
 668  669           * If we have children, we're going to remove them all and set them
 669  670           * all to be zombies.
 670  671           */
 671  672          while ((child = avl_first(&watch->inw_children)) != NULL) {
 672  673                  VERIFY(child->inw_parent == watch);
 673  674                  avl_remove(&watch->inw_children, child);
 674  675  
 675  676                  err = fem_uninstall(child->inw_vp, inotify_femp, child);
 676  677                  VERIFY(err == 0);
 677  678  
 678  679                  /*
 679  680                   * If this child watch has been orphaned, remove it from the
 680  681                   * state's list of orphans.
 681  682                   */
 682  683                  if (child->inw_orphaned) {
 683  684                          list_remove(&state->ins_orphans, child);
 684  685                          crfree(child->inw_cred);
 685  686                  }
 686  687  
 687  688                  VN_RELE(child->inw_vp);
 688  689  
 689  690                  /*
 690  691                   * We're down (or should be down) to a single reference to
 691  692                   * this child watch; it's safe to zombify it.
 692  693                   */
 693  694                  inotify_watch_zombify(child);
 694  695          }
 695  696  
 696  697          inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
 697  698          VN_RELE(watch->inw_vp);
 698  699  
 699  700          /*
 700  701           * It's now safe to zombify the watch -- we know that the only reference
 701  702           * can come from operations in flight.
 702  703           */
 703  704          inotify_watch_zombify(watch);
 704  705  }
 705  706  
 706  707  /*
 707  708   * Delete a watch.  Should only be called from VOP context.
 708  709   */
 709  710  static void
 710  711  inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
 711  712  {
 712  713          inotify_state_t *state = watch->inw_state;
 713  714          inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
 714  715          int err;
 715  716  
 716  717          if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
 717  718                  return;
 718  719  
 719  720          mutex_enter(&state->ins_lock);
 720  721  
 721  722          if (watch->inw_zombie) {
 722  723                  mutex_exit(&state->ins_lock);
 723  724                  return;
 724  725          }
 725  726  
 726  727          if ((parent = watch->inw_parent) == NULL) {
 727  728                  if (event == IN_DELETE_SELF) {
 728  729                          /*
 729  730                           * If we're here because we're being deleted and we
 730  731                           * are not a child watch, we need to delete the entire
 731  732                           * watch, children and all.
 732  733                           */
 733  734                          inotify_watch_remove(state, watch);
 734  735                  }
 735  736  
 736  737                  mutex_exit(&state->ins_lock);
 737  738                  return;
 738  739          } else {
 739  740                  if (event == IN_DELETE_SELF &&
 740  741                      !(parent->inw_mask & IN_EXCL_UNLINK)) {
 741  742                          /*
 742  743                           * This is a child watch for a file that is being
 743  744                           * removed and IN_EXCL_UNLINK has not been specified;
 744  745                           * indicate that it is orphaned and add it to the list
 745  746                           * of orphans.  (This list will be checked by the
 746  747                           * cleaning cyclic to determine when the watch has
 747  748                           * become the only hold on the vnode, at which point
 748  749                           * the watch can be zombified.)  Note that we check
 749  750                           * if the watch is orphaned before we orphan it:  hard
 750  751                           * links make it possible for VE_REMOVE to be called
 751  752                           * multiple times on the same vnode. (!)
 752  753                           */
 753  754                          if (!watch->inw_orphaned) {
 754  755                                  watch->inw_orphaned = 1;
 755  756                                  watch->inw_cred = CRED();
 756  757                                  crhold(watch->inw_cred);
 757  758                                  list_insert_head(&state->ins_orphans, watch);
 758  759                          }
 759  760  
 760  761                          mutex_exit(&state->ins_lock);
 761  762                          return;
 762  763                  }
 763  764  
 764  765                  if (watch->inw_orphaned) {
 765  766                          /*
 766  767                           * If we're here, a file was orphaned and then later
 767  768                           * moved -- which almost certainly means that hard
 768  769                           * links are on the scene.  We choose the orphan over
 769  770                           * the move because we don't want to spuriously
 770  771                           * drop events if we can avoid it.
 771  772                           */
 772  773                          crfree(watch->inw_cred);
 773  774                          list_remove(&state->ins_orphans, watch);
 774  775                  }
 775  776          }
 776  777  
 777  778          if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
 778  779                  /*
 779  780                   * This watch has already been deleted from the parent.
 780  781                   */
 781  782                  mutex_exit(&state->ins_lock);
 782  783                  return;
 783  784          }
 784  785  
 785  786          avl_remove(&parent->inw_children, watch);
 786  787          err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
 787  788          VERIFY(err == 0);
 788  789  
 789  790          VN_RELE(watch->inw_vp);
 790  791  
 791  792          /*
 792  793           * It's now safe to zombify the watch -- which won't actually delete
 793  794           * it as we know that the reference count is greater than 1.
 794  795           */
 795  796          inotify_watch_zombify(watch);
 796  797          mutex_exit(&state->ins_lock);
 797  798  }
 798  799  
 799  800  /*
 800  801   * Insert a new child watch.  Should only be called from VOP context when
 801  802   * a child is created in a watched directory.
 802  803   */
 803  804  static void
 804  805  inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
 805  806  {
 806  807          inotify_state_t *state = watch->inw_state;
 807  808          inotify_watch_t cmp = { .inw_vp = vp };
 808  809  
 809  810          if (!(watch->inw_mask & IN_CHILD_EVENTS))
 810  811                  return;
 811  812  
 812  813          mutex_enter(&state->ins_lock);
 813  814  
 814  815          if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
 815  816                  mutex_exit(&state->ins_lock);
 816  817                  return;
 817  818          }
 818  819  
 819  820          if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
 820  821                  mutex_exit(&state->ins_lock);
 821  822                  return;
 822  823          }
 823  824  
 824  825          VN_HOLD(vp);
 825  826          watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
 826  827          VERIFY(watch != NULL);
 827  828  
 828  829          mutex_exit(&state->ins_lock);
 829  830  }
 830  831  
 831  832  
 832  833  static int
 833  834  inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
 834  835      int32_t *wdp)
 835  836  {
 836  837          inotify_watch_t *watch, cmp = { .inw_vp = vp };
 837  838          uint32_t set;
 838  839  
 839  840          set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
 840  841  
 841  842          /*
 842  843           * Lookup our vnode to determine if we already have a watch on it.
 843  844           */
 844  845          mutex_enter(&state->ins_lock);
 845  846  
 846  847          if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
 847  848                  /*
 848  849                   * We don't have this watch; allocate a new one, provided that
 849  850                   * we have fewer than our limit.
 850  851                   */
 851  852                  if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
 852  853                          mutex_exit(&state->ins_lock);
 853  854                          return (ENOSPC);
 854  855                  }
 855  856  
 856  857                  VN_HOLD(vp);
 857  858                  watch = inotify_watch_add(state, NULL, NULL, vp, set);
 858  859                  *wdp = watch->inw_wd;
 859  860                  mutex_exit(&state->ins_lock);
 860  861  
 861  862                  return (0);
 862  863          }
 863  864  
 864  865          VERIFY(!watch->inw_zombie);
 865  866  
 866  867          if (!(mask & IN_MASK_ADD)) {
 867  868                  /*
 868  869                   * Note that if we're resetting our event mask and we're
 869  870                   * transitioning from an event mask that includes child events
 870  871                   * to one that doesn't, there will be potentially some stale
 871  872                   * child watches.  This is basically fine:  they won't fire,
 872  873                   * and they will correctly be removed when the watch is
 873  874                   * removed.
 874  875                   */
 875  876                  watch->inw_mask = 0;
 876  877          }
 877  878  
 878  879          watch->inw_mask |= set;
 879  880  
 880  881          *wdp = watch->inw_wd;
 881  882  
 882  883          mutex_exit(&state->ins_lock);
 883  884  
 884  885          return (0);
 885  886  }
 886  887  
 887  888  static int
 888  889  inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
 889  890  {
 890  891          inotify_watch_t *watch, cmp = { .inw_vp = vp };
 891  892          vnode_t *cvp;
 892  893          int err;
 893  894  
 894  895          /*
 895  896           * Verify that the specified child doesn't have a directory component
 896  897           * within it.
 897  898           */
 898  899          if (strchr(name, '/') != NULL)
 899  900                  return (EINVAL);
 900  901  
 901  902          /*
 902  903           * Lookup the underlying file.  Note that this will succeed even if
 903  904           * we don't have permissions to actually read the file.
 904  905           */
 905  906          if ((err = lookupnameat(name,
 906  907              UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
 907  908                  return (err);
 908  909          }
 909  910  
 910  911          /*
 911  912           * Use our vnode to find our watch, and then add our child watch to it.
 912  913           */
 913  914          mutex_enter(&state->ins_lock);
 914  915  
 915  916          if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
 916  917                  /*
 917  918                   * This is unexpected -- it means that we don't have the
 918  919                   * watch that we thought we had.
 919  920                   */
 920  921                  mutex_exit(&state->ins_lock);
 921  922                  VN_RELE(cvp);
 922  923                  return (ENXIO);
 923  924          }
 924  925  
 925  926          /*
 926  927           * Now lookup the child vnode in the watch; we'll only add it if it
 927  928           * isn't already there.
 928  929           */
 929  930          cmp.inw_vp = cvp;
 930  931  
 931  932          if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
 932  933                  mutex_exit(&state->ins_lock);
 933  934                  VN_RELE(cvp);
 934  935                  return (0);
 935  936          }
 936  937  
 937  938          watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
 938  939          VERIFY(watch != NULL);
 939  940          mutex_exit(&state->ins_lock);
 940  941  
 941  942          return (0);
 942  943  }
 943  944  
 944  945  static int
 945  946  inotify_rm_watch(inotify_state_t *state, int32_t wd)
 946  947  {
 947  948          inotify_watch_t *watch, cmp = { .inw_wd = wd };
 948  949  
 949  950          mutex_enter(&state->ins_lock);
 950  951  
 951  952          if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
 952  953                  mutex_exit(&state->ins_lock);
 953  954                  return (EINVAL);
 954  955          }
 955  956  
 956  957          inotify_watch_remove(state, watch);
 957  958          mutex_exit(&state->ins_lock);
 958  959  
 959  960          return (0);
 960  961  }
 961  962  
 962  963  static int
 963  964  inotify_activate(inotify_state_t *state, int32_t wd)
 964  965  {
 965  966          inotify_watch_t *watch, cmp = { .inw_wd = wd };
 966  967  
 967  968          mutex_enter(&state->ins_lock);
 968  969  
 969  970          if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
 970  971                  mutex_exit(&state->ins_lock);
 971  972                  return (EINVAL);
 972  973          }
 973  974  
 974  975          watch->inw_active = 1;
 975  976  
 976  977          mutex_exit(&state->ins_lock);
 977  978  
 978  979          return (0);
 979  980  }
 980  981  
 981  982  /*
 982  983   * Called periodically as a cyclic to process the orphans and zombies.
 983  984   */
 984  985  static void
 985  986  inotify_clean(void *arg)
 986  987  {
 987  988          inotify_state_t *state = arg;
 988  989          inotify_watch_t *watch, *parent, *next, **prev;
 989  990          cred_t *savecred;
 990  991          int err;
 991  992  
 992  993          mutex_enter(&state->ins_lock);
 993  994  
 994  995          for (watch = list_head(&state->ins_orphans);
 995  996              watch != NULL; watch = next) {
 996  997                  next = list_next(&state->ins_orphans, watch);
 997  998  
 998  999                  VERIFY(!watch->inw_zombie);
 999 1000                  VERIFY((parent = watch->inw_parent) != NULL);
1000 1001  
1001 1002                  if (watch->inw_vp->v_count > 1)
1002 1003                          continue;
1003 1004  
1004 1005                  avl_remove(&parent->inw_children, watch);
1005 1006                  err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
1006 1007                  VERIFY(err == 0);
1007 1008  
1008 1009                  list_remove(&state->ins_orphans, watch);
1009 1010  
1010 1011                  /*
1011 1012                   * For purposes of releasing the vnode, we need to switch our
1012 1013                   * cred to be the cred of the orphaning thread (which we held
1013 1014                   * at the time this watch was orphaned).
1014 1015                   */
1015 1016                  savecred = curthread->t_cred;
1016 1017                  curthread->t_cred = watch->inw_cred;
1017 1018                  VN_RELE(watch->inw_vp);
1018 1019                  crfree(watch->inw_cred);
1019 1020                  curthread->t_cred = savecred;
1020 1021  
1021 1022                  inotify_watch_zombify(watch);
1022 1023          }
1023 1024  
1024 1025          prev = &state->ins_zombies;
1025 1026  
1026 1027          while ((watch = *prev) != NULL) {
1027 1028                  mutex_enter(&watch->inw_lock);
1028 1029  
1029 1030                  if (watch->inw_refcnt == 1) {
1030 1031                          *prev = watch->inw_parent;
1031 1032                          inotify_watch_destroy(watch);
1032 1033                          continue;
1033 1034                  }
1034 1035  
1035 1036                  prev = &watch->inw_parent;
1036 1037                  mutex_exit(&watch->inw_lock);
1037 1038          }
1038 1039  
1039 1040          mutex_exit(&state->ins_lock);
1040 1041  }
1041 1042  
1042 1043  /*ARGSUSED*/
1043 1044  static int
1044 1045  inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1045 1046  {
1046 1047          inotify_state_t *state;
1047 1048          major_t major = getemajor(*devp);
1048 1049          minor_t minor = getminor(*devp);
1049 1050          int instances = 0;
1050 1051          char c[64];
1051 1052  
1052 1053          if (minor != INOTIFYMNRN_INOTIFY)
1053 1054                  return (ENXIO);
1054 1055  
1055 1056          mutex_enter(&inotify_lock);
1056 1057  
1057 1058          for (state = inotify_state; state != NULL; state = state->ins_next) {
1058 1059                  if (state->ins_cred == cred_p)
1059 1060                          instances++;
1060 1061          }
1061 1062  
1062 1063          if (instances >= inotify_maxinstances) {
1063 1064                  mutex_exit(&inotify_lock);
1064 1065                  return (EMFILE);
1065 1066          }
1066 1067  
1067 1068          minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
1068 1069              VM_BESTFIT | VM_SLEEP);
1069 1070  
1070 1071          if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
1071 1072                  vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1072 1073                  mutex_exit(&inotify_lock);
1073 1074                  return (NULL);
1074 1075          }
1075 1076  
1076 1077          state = ddi_get_soft_state(inotify_softstate, minor);
1077 1078          *devp = makedevice(major, minor);
1078 1079  
1079 1080          crhold(cred_p);
1080 1081          state->ins_cred = cred_p;
1081 1082          state->ins_next = inotify_state;
1082 1083          inotify_state = state;
1083 1084  
1084 1085          (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
1085 1086          state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
1086 1087              NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
1087 1088  
1088 1089          avl_create(&state->ins_bywd,
1089 1090              (int(*)(const void *, const void *))inotify_watch_cmpwd,
1090 1091              sizeof (inotify_watch_t),
1091 1092              offsetof(inotify_watch_t, inw_bywd));
1092 1093  
1093 1094          avl_create(&state->ins_byvp,
1094 1095              (int(*)(const void *, const void *))inotify_watch_cmpvp,
1095 1096              sizeof (inotify_watch_t),
1096 1097              offsetof(inotify_watch_t, inw_byvp));
1097 1098  
1098 1099          list_create(&state->ins_orphans, sizeof (inotify_watch_t),
1099 1100              offsetof(inotify_watch_t, inw_orphan));
1100 1101  
1101 1102          state->ins_maxwatches = inotify_maxwatches;
1102 1103          state->ins_maxevents = inotify_maxevents;
1103 1104  
1104 1105          mutex_exit(&inotify_lock);
1105 1106  
1106 1107          state->ins_cleaner = ddi_periodic_add(inotify_clean,
1107 1108              state, NANOSEC, DDI_IPL_0);
1108 1109  
1109 1110          return (0);
1110 1111  }
1111 1112  
1112 1113  /*ARGSUSED*/
1113 1114  static int
1114 1115  inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
1115 1116  {
1116 1117          inotify_state_t *state;
1117 1118          inotify_kevent_t *event;
1118 1119          minor_t minor = getminor(dev);
1119 1120          int err = 0, nevents = 0;
1120 1121          size_t len;
1121 1122  
1122 1123          state = ddi_get_soft_state(inotify_softstate, minor);
1123 1124  
1124 1125          mutex_enter(&state->ins_lock);
1125 1126  
1126 1127          while (state->ins_head == NULL) {
1127 1128                  if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
1128 1129                          mutex_exit(&state->ins_lock);
1129 1130                          return (EAGAIN);
1130 1131                  }
1131 1132  
1132 1133                  if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
1133 1134                          mutex_exit(&state->ins_lock);
1134 1135                          return (EINTR);
1135 1136                  }
1136 1137          }
1137 1138  
1138 1139          /*
1139 1140           * We have events and we have our lock; return as many as we can.
1140 1141           */
1141 1142          while ((event = state->ins_head) != NULL) {
1142 1143                  len = sizeof (event->ine_event) + event->ine_event.len;
1143 1144  
1144 1145                  if (uio->uio_resid < len) {
1145 1146                          if (nevents == 0)
1146 1147                                  err = EINVAL;
1147 1148                          break;
1148 1149                  }
1149 1150  
1150 1151                  nevents++;
1151 1152  
1152 1153                  if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
1153 1154                          break;
1154 1155  
1155 1156                  VERIFY(state->ins_nevents > 0);
1156 1157                  state->ins_nevents--;
1157 1158  
1158 1159                  VERIFY(state->ins_size > 0);
1159 1160                  state->ins_size -= len;
1160 1161  
1161 1162                  if ((state->ins_head = event->ine_next) == NULL) {
1162 1163                          VERIFY(event == state->ins_tail);
1163 1164                          VERIFY(state->ins_nevents == 0);
1164 1165                          state->ins_tail = NULL;
1165 1166                  }
1166 1167  
1167 1168                  kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1168 1169          }
1169 1170  
1170 1171          mutex_exit(&state->ins_lock);
1171 1172  
1172 1173          return (err);
1173 1174  }
1174 1175  
1175 1176  /*ARGSUSED*/
1176 1177  static int
1177 1178  inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
1178 1179      struct pollhead **phpp)
1179 1180  {
1180 1181          inotify_state_t *state;
1181 1182          minor_t minor = getminor(dev);
1182 1183  
1183 1184          state = ddi_get_soft_state(inotify_softstate, minor);
1184 1185  
1185 1186          mutex_enter(&state->ins_lock);
1186 1187  
1187 1188          if (state->ins_head != NULL) {
1188 1189                  *reventsp = events & (POLLRDNORM | POLLIN);
1189 1190          } else {
1190 1191                  *reventsp = 0;
1191 1192  
1192 1193                  if (!anyyet)
1193 1194                          *phpp = &state->ins_pollhd;
1194 1195          }
1195 1196  
1196 1197          mutex_exit(&state->ins_lock);
1197 1198  
1198 1199          return (0);
1199 1200  }
1200 1201  
1201 1202  /*ARGSUSED*/
1202 1203  static int
1203 1204  inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
1204 1205  {
1205 1206          inotify_state_t *state;
1206 1207          minor_t minor = getminor(dev);
1207 1208          file_t *fp;
1208 1209          int rval;
1209 1210  
1210 1211          state = ddi_get_soft_state(inotify_softstate, minor);
1211 1212  
1212 1213          switch (cmd) {
1213 1214          case INOTIFYIOC_ADD_WATCH: {
1214 1215                  inotify_addwatch_t addwatch;
1215 1216                  file_t *fp;
1216 1217  
1217 1218                  if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
1218 1219                          return (EFAULT);
1219 1220  
1220 1221                  if ((fp = getf(addwatch.inaw_fd)) == NULL)
1221 1222                          return (EBADF);
1222 1223  
1223 1224                  rval = inotify_add_watch(state, fp->f_vnode,
1224 1225                      addwatch.inaw_mask, rv);
1225 1226  
1226 1227                  releasef(addwatch.inaw_fd);
1227 1228                  return (rval);
1228 1229          }
1229 1230  
1230 1231          case INOTIFYIOC_ADD_CHILD: {
1231 1232                  inotify_addchild_t addchild;
1232 1233                  char name[MAXPATHLEN];
1233 1234  
1234 1235                  if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
1235 1236                          return (EFAULT);
1236 1237  
1237 1238                  if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
1238 1239                          return (EFAULT);
1239 1240  
1240 1241                  if ((fp = getf(addchild.inac_fd)) == NULL)
1241 1242                          return (EBADF);
1242 1243  
1243 1244                  rval = inotify_add_child(state, fp->f_vnode, name);
1244 1245  
1245 1246                  releasef(addchild.inac_fd);
1246 1247                  return (rval);
1247 1248          }
1248 1249  
1249 1250          case INOTIFYIOC_RM_WATCH:
1250 1251                  return (inotify_rm_watch(state, arg));
1251 1252  
1252 1253          case INOTIFYIOC_ACTIVATE:
1253 1254                  return (inotify_activate(state, arg));
1254 1255  
1255 1256          case FIONREAD: {
1256 1257                  int32_t size;
1257 1258  
1258 1259                  mutex_enter(&state->ins_lock);
1259 1260                  size = state->ins_size;
1260 1261                  mutex_exit(&state->ins_lock);
1261 1262  
1262 1263                  if (copyout(&size, (void *)arg, sizeof (size)) != 0)
1263 1264                          return (EFAULT);
1264 1265  
1265 1266                  return (0);
1266 1267          }
1267 1268  
1268 1269          default:
1269 1270                  break;
1270 1271          }
1271 1272  
1272 1273          return (ENOTTY);
1273 1274  }
1274 1275  
1275 1276  /*ARGSUSED*/
1276 1277  static int
1277 1278  inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
1278 1279  {
1279 1280          inotify_state_t *state, **sp;
1280 1281          inotify_watch_t *watch, *zombies;
1281 1282          inotify_kevent_t *event;
1282 1283          minor_t minor = getminor(dev);
1283 1284  
1284 1285          state = ddi_get_soft_state(inotify_softstate, minor);
1285 1286  
1286 1287          if (state->ins_pollhd.ph_list != NULL) {
1287 1288                  pollwakeup(&state->ins_pollhd, POLLERR);
1288 1289                  pollhead_clean(&state->ins_pollhd);
1289 1290          }
1290 1291  
1291 1292          mutex_enter(&state->ins_lock);
1292 1293  
1293 1294          /*
1294 1295           * First, destroy all of our watches.
1295 1296           */
1296 1297          while ((watch = avl_first(&state->ins_bywd)) != NULL)
1297 1298                  inotify_watch_remove(state, watch);
1298 1299  
1299 1300          /*
1300 1301           * And now destroy our event queue.
1301 1302           */
1302 1303          while ((event = state->ins_head) != NULL) {
1303 1304                  state->ins_head = event->ine_next;
1304 1305                  kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1305 1306          }
1306 1307  
1307 1308          zombies = state->ins_zombies;
1308 1309          state->ins_zombies = NULL;
1309 1310          mutex_exit(&state->ins_lock);
1310 1311  
1311 1312          /*
1312 1313           * Now that our state lock is dropped, we can synchronously wait on
1313 1314           * any zombies.
1314 1315           */
1315 1316          while ((watch = zombies) != NULL) {
1316 1317                  zombies = zombies->inw_parent;
1317 1318  
1318 1319                  mutex_enter(&watch->inw_lock);
1319 1320  
1320 1321                  while (watch->inw_refcnt > 1)
1321 1322                          cv_wait(&watch->inw_cv, &watch->inw_lock);
1322 1323  
1323 1324                  inotify_watch_destroy(watch);
1324 1325          }
1325 1326  
1326 1327          if (state->ins_cleaner != NULL) {
1327 1328                  ddi_periodic_delete(state->ins_cleaner);
1328 1329                  state->ins_cleaner = NULL;
1329 1330          }
1330 1331  
1331 1332          mutex_enter(&inotify_lock);
1332 1333  
1333 1334          /*
1334 1335           * Remove our state from our global list, and release our hold on
1335 1336           * the cred.
1336 1337           */
1337 1338          for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
1338 1339                  VERIFY(*sp != NULL);
1339 1340  
1340 1341          *sp = (*sp)->ins_next;
1341 1342          crfree(state->ins_cred);
1342 1343          vmem_destroy(state->ins_wds);
1343 1344  
1344 1345          ddi_soft_state_free(inotify_softstate, minor);
1345 1346          vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1346 1347  
1347 1348          mutex_exit(&inotify_lock);
1348 1349  
1349 1350          return (0);
1350 1351  }
1351 1352  
1352 1353  /*ARGSUSED*/
1353 1354  static int
1354 1355  inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1355 1356  {
1356 1357          mutex_enter(&inotify_lock);
1357 1358  
1358 1359          if (ddi_soft_state_init(&inotify_softstate,
1359 1360              sizeof (inotify_state_t), 0) != 0) {
1360 1361                  cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
1361 1362                  mutex_exit(&inotify_lock);
1362 1363                  return (DDI_FAILURE);
1363 1364          }
1364 1365  
1365 1366          if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
1366 1367              INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
1367 1368                  cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
1368 1369                  ddi_soft_state_fini(&inotify_softstate);
1369 1370                  mutex_exit(&inotify_lock);
1370 1371                  return (DDI_FAILURE);
1371 1372          }
1372 1373  
1373 1374          if (fem_create("inotify_fem",
1374 1375              inotify_vnodesrc_template, &inotify_femp) != 0) {
1375 1376                  cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
1376 1377                  ddi_remove_minor_node(devi, NULL);
1377 1378                  ddi_soft_state_fini(&inotify_softstate);
1378 1379                  mutex_exit(&inotify_lock);
1379 1380                  return (DDI_FAILURE);
1380 1381          }
1381 1382  
1382 1383          ddi_report_dev(devi);
1383 1384          inotify_devi = devi;
1384 1385  
1385 1386          inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
1386 1387              UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
1387 1388              VM_SLEEP | VMC_IDENTIFIER);
1388 1389  
1389 1390          mutex_exit(&inotify_lock);
1390 1391  
1391 1392          return (DDI_SUCCESS);
1392 1393  }
1393 1394  
1394 1395  /*ARGSUSED*/
1395 1396  static int
1396 1397  inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1397 1398  {
1398 1399          switch (cmd) {
1399 1400          case DDI_DETACH:
1400 1401                  break;
1401 1402  
1402 1403          case DDI_SUSPEND:
1403 1404                  return (DDI_SUCCESS);
1404 1405  
1405 1406          default:
1406 1407                  return (DDI_FAILURE);
1407 1408          }
1408 1409  
1409 1410          mutex_enter(&inotify_lock);
1410 1411          fem_free(inotify_femp);
1411 1412          vmem_destroy(inotify_minor);
1412 1413  
1413 1414          ddi_remove_minor_node(inotify_devi, NULL);
1414 1415          inotify_devi = NULL;
1415 1416  
1416 1417          ddi_soft_state_fini(&inotify_softstate);
1417 1418          mutex_exit(&inotify_lock);
1418 1419  
1419 1420          return (DDI_SUCCESS);
1420 1421  }
1421 1422  
1422 1423  /*ARGSUSED*/
1423 1424  static int
1424 1425  inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1425 1426  {
1426 1427          int error;
1427 1428  
1428 1429          switch (infocmd) {
1429 1430          case DDI_INFO_DEVT2DEVINFO:
1430 1431                  *result = (void *)inotify_devi;
1431 1432                  error = DDI_SUCCESS;
1432 1433                  break;
1433 1434          case DDI_INFO_DEVT2INSTANCE:
1434 1435                  *result = (void *)0;
1435 1436                  error = DDI_SUCCESS;
1436 1437                  break;
1437 1438          default:
1438 1439                  error = DDI_FAILURE;
1439 1440          }
1440 1441          return (error);
1441 1442  }
1442 1443  
1443 1444  static struct cb_ops inotify_cb_ops = {
1444 1445          inotify_open,           /* open */
1445 1446          inotify_close,          /* close */
1446 1447          nulldev,                /* strategy */
1447 1448          nulldev,                /* print */
1448 1449          nodev,                  /* dump */
1449 1450          inotify_read,           /* read */
1450 1451          nodev,                  /* write */
1451 1452          inotify_ioctl,          /* ioctl */
1452 1453          nodev,                  /* devmap */
1453 1454          nodev,                  /* mmap */
1454 1455          nodev,                  /* segmap */
1455 1456          inotify_poll,           /* poll */
1456 1457          ddi_prop_op,            /* cb_prop_op */
1457 1458          0,                      /* streamtab  */
1458 1459          D_NEW | D_MP            /* Driver compatibility flag */
1459 1460  };
1460 1461  
1461 1462  static struct dev_ops inotify_ops = {
1462 1463          DEVO_REV,               /* devo_rev */
1463 1464          0,                      /* refcnt */
1464 1465          inotify_info,           /* get_dev_info */
1465 1466          nulldev,                /* identify */
1466 1467          nulldev,                /* probe */
1467 1468          inotify_attach,         /* attach */
1468 1469          inotify_detach,         /* detach */
1469 1470          nodev,                  /* reset */
1470 1471          &inotify_cb_ops,        /* driver operations */
1471 1472          NULL,                   /* bus operations */
1472 1473          nodev,                  /* dev power */
1473 1474          ddi_quiesce_not_needed, /* quiesce */
1474 1475  };
1475 1476  
1476 1477  static struct modldrv modldrv = {
1477 1478          &mod_driverops,         /* module type (this is a pseudo driver) */
1478 1479          "inotify support",      /* name of module */
1479 1480          &inotify_ops,           /* driver ops */
1480 1481  };
1481 1482  
1482 1483  static struct modlinkage modlinkage = {
1483 1484          MODREV_1,
1484 1485          (void *)&modldrv,
1485 1486          NULL
1486 1487  };
1487 1488  
1488 1489  int
1489 1490  _init(void)
1490 1491  {
1491 1492          return (mod_install(&modlinkage));
1492 1493  }
1493 1494  
1494 1495  int
1495 1496  _info(struct modinfo *modinfop)
1496 1497  {
1497 1498          return (mod_info(&modlinkage, modinfop));
1498 1499  }
1499 1500  
1500 1501  int
1501 1502  _fini(void)
1502 1503  {
1503 1504          return (mod_remove(&modlinkage));
1504 1505  }
  
    | 
      ↓ open down ↓ | 
    862 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX