1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 * Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
15 */
16
17 /*
18 * Support for the inotify facility, a Linux-borne facility for asynchronous
19 * notification of certain events on specified files or directories. Our
20 * implementation broadly leverages the file event monitoring facility, and
21 * would actually be quite straightforward were it not for a very serious
22 * blunder in the inotify interface: in addition to allowing for one to be
23 * notified on events on a particular file or directory, inotify also allows
24 * for one to be notified on certain events on files _within_ a watched
25 * directory -- even though those events have absolutely nothing to do with
26 * the directory itself. This leads to all sorts of madness because file
27 * operations are (of course) not undertaken on paths but rather on open
28 * files -- and the relationships between open files and the paths that resolve
29 * to those files are neither static nor isomorphic. We implement this
30 * concept by having _child watches_ when directories are watched with events
31 * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
32 * first added, and we modify those child watches dynamically as files are
33 * created, deleted, moved into or moved out of the specified directory. This
34 * mechanism works well, absent hard links. Hard links, unfortunately, break
35 * this rather badly, and the user is warned that watches on directories that
36 * have multiple directory entries referring to the same file may behave
37 * unexpectedly.
38 */
39
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/inotify.h>
43 #include <sys/fem.h>
44 #include <sys/conf.h>
45 #include <sys/stat.h>
46 #include <sys/vfs_opreg.h>
47 #include <sys/vmem.h>
48 #include <sys/avl.h>
49 #include <sys/sysmacros.h>
50 #include <sys/cyclic.h>
51 #include <sys/filio.h>
52
53 struct inotify_state;
54 struct inotify_kevent;
55
56 typedef struct inotify_watch inotify_watch_t;
57 typedef struct inotify_state inotify_state_t;
58 typedef struct inotify_kevent inotify_kevent_t;
59
60 struct inotify_watch {
61 kmutex_t inw_lock; /* lock protecting ref count */
62 int inw_refcnt; /* reference count */
63 uint8_t inw_zombie:1; /* boolean: is zombie */
64 uint8_t inw_fired:1; /* boolean: fired one-shot */
65 uint8_t inw_active:1; /* boolean: watch is active */
66 uint8_t inw_orphaned:1; /* boolean: orphaned */
67 kcondvar_t inw_cv; /* condvar for zombifier */
68 uint32_t inw_mask; /* mask of watch */
69 int32_t inw_wd; /* watch descriptor */
70 vnode_t *inw_vp; /* underlying vnode */
71 inotify_watch_t *inw_parent; /* parent, if a child */
72 avl_node_t inw_byvp; /* watches by vnode */
73 avl_node_t inw_bywd; /* watches by descriptor */
74 avl_tree_t inw_children; /* children, if a parent */
75 char *inw_name; /* name, if a child */
76 list_node_t inw_orphan; /* orphan list */
77 cred_t *inw_cred; /* cred, if orphaned */
78 inotify_state_t *inw_state; /* corresponding state */
79 };
80
81 struct inotify_kevent {
82 inotify_kevent_t *ine_next; /* next event in queue */
83 struct inotify_event ine_event; /* event (variable size) */
84 };
85
86 #define INOTIFY_EVENT_LENGTH(ev) \
87 (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
88
89 struct inotify_state {
90 kmutex_t ins_lock; /* lock protecting state */
91 avl_tree_t ins_byvp; /* watches by vnode */
92 avl_tree_t ins_bywd; /* watches by descriptor */
93 vmem_t *ins_wds; /* watch identifier arena */
94 int ins_maxwatches; /* maximum number of watches */
95 int ins_maxevents; /* maximum number of events */
96 int ins_nevents; /* current # of events */
97 int32_t ins_size; /* total size of events */
98 inotify_kevent_t *ins_head; /* head of event queue */
99 inotify_kevent_t *ins_tail; /* tail of event queue */
100 pollhead_t ins_pollhd; /* poll head */
101 kcondvar_t ins_cv; /* condvar for reading */
102 list_t ins_orphans; /* orphan list */
103 ddi_periodic_t ins_cleaner; /* cyclic for cleaning */
104 inotify_watch_t *ins_zombies; /* zombie watch list */
105 cred_t *ins_cred; /* creator's credentials */
106 inotify_state_t *ins_next; /* next state on global list */
107 };
108
109 /*
110 * Tunables (exported read-only in lx-branded zones via /proc).
111 */
112 int inotify_maxwatches = 8192; /* max watches per instance */
113 int inotify_maxevents = 16384; /* max events */
114 int inotify_maxinstances = 128; /* max instances per user */
115
116 /*
117 * Internal global variables.
118 */
119 static kmutex_t inotify_lock; /* lock protecting state */
120 static dev_info_t *inotify_devi; /* device info */
121 static fem_t *inotify_femp; /* FEM pointer */
122 static vmem_t *inotify_minor; /* minor number arena */
123 static void *inotify_softstate; /* softstate pointer */
124 static inotify_state_t *inotify_state; /* global list if state */
125
126 static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
127 static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
128 static void inotify_watch_delete(inotify_watch_t *, uint32_t);
129 static void inotify_watch_remove(inotify_state_t *state,
130 inotify_watch_t *watch);
131
132 static int
133 inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
134 cred_t *cr, caller_context_t *ct)
135 {
136 inotify_watch_t *watch = vf->fa_fnode->fn_available;
137 int rval;
138
139 if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
140 inotify_watch_event(watch, flag & FWRITE ?
141 IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
142 }
143
144 return (rval);
145 }
146
147 static int
148 inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
149 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
150 vsecattr_t *vsecp)
151 {
152 inotify_watch_t *watch = vf->fa_fnode->fn_available;
153 int rval;
154
155 if ((rval = vnext_create(vf, name, vap, excl, mode,
156 vpp, cr, flag, ct, vsecp)) == 0) {
157 inotify_watch_insert(watch, *vpp, name);
158 inotify_watch_event(watch, IN_CREATE, name);
159 }
160
161 return (rval);
162 }
163
164 static int
165 inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
166 caller_context_t *ct, int flags)
167 {
168 inotify_watch_t *watch = vf->fa_fnode->fn_available;
169 int rval;
170
171 if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
172 inotify_watch_insert(watch, svp, tnm);
173 inotify_watch_event(watch, IN_CREATE, tnm);
174 }
175
176 return (rval);
177 }
178
179 static int
180 inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
181 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
182 {
183 inotify_watch_t *watch = vf->fa_fnode->fn_available;
184 int rval;
185
186 if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
187 ct, flags, vsecp)) == 0) {
188 inotify_watch_insert(watch, *vpp, name);
189 inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
190 }
191
192 return (rval);
193 }
194
195 static int
196 inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
197 {
198 inotify_watch_t *watch = vf->fa_fnode->fn_available;
199 int rval;
200
201 if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
202 inotify_watch_event(watch, IN_OPEN, NULL);
203
204 return (rval);
205 }
206
207 static int
208 inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
209 caller_context_t *ct)
210 {
211 inotify_watch_t *watch = vf->fa_fnode->fn_available;
212 int rval = vnext_read(vf, uiop, ioflag, cr, ct);
213 inotify_watch_event(watch, IN_ACCESS, NULL);
214
215 return (rval);
216 }
217
218 static int
219 inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
220 caller_context_t *ct, int flags)
221 {
222 inotify_watch_t *watch = vf->fa_fnode->fn_available;
223 int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
224 inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
225
226 return (rval);
227 }
228
229 int
230 inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
231 int flags)
232 {
233 inotify_watch_t *watch = vf->fa_fnode->fn_available;
234 int rval;
235
236 if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
237 inotify_watch_event(watch, IN_DELETE, nm);
238
239 return (rval);
240 }
241
242 int
243 inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
244 caller_context_t *ct, int flags)
245 {
246 inotify_watch_t *watch = vf->fa_fnode->fn_available;
247 int rval;
248
249 if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
250 inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
251
252 return (rval);
253 }
254
255 static int
256 inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
257 caller_context_t *ct)
258 {
259 inotify_watch_t *watch = vf->fa_fnode->fn_available;
260 int rval;
261
262 if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
263 inotify_watch_event(watch, IN_ATTRIB, NULL);
264
265 return (rval);
266 }
267
268 static int
269 inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
270 caller_context_t *ct)
271 {
272 inotify_watch_t *watch = vf->fa_fnode->fn_available;
273 int rval = vnext_write(vf, uiop, ioflag, cr, ct);
274 inotify_watch_event(watch, IN_MODIFY, NULL);
275
276 return (rval);
277 }
278
279 static int
280 inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
281 caller_context_t *ct)
282 {
283 inotify_watch_t *watch = vf->fa_fnode->fn_available;
284
285 switch (vnevent) {
286 case VE_RENAME_SRC:
287 inotify_watch_event(watch, IN_MOVE_SELF, NULL);
288 inotify_watch_delete(watch, IN_MOVE_SELF);
289 break;
290 case VE_REMOVE:
291 /*
292 * Linux will apparently fire an IN_ATTRIB event when the link
293 * count changes (including when it drops to 0 on a remove).
294 * This is merely somewhat odd; what is amazing is that this
295 * IN_ATTRIB event is not visible on an inotify watch on the
296 * parent directory. (IN_ATTRIB events are normally sent to
297 * watches on the parent directory). While it's hard to
298 * believe that this constitutes desired semantics, ltp
299 * unfortunately tests this case (if implicitly); in the name
300 * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
301 * explicitly watching the file that has been removed.
302 */
303 if (watch->inw_parent == NULL)
304 inotify_watch_event(watch, IN_ATTRIB, NULL);
305
306 /*FALLTHROUGH*/
307 case VE_RENAME_DEST:
308 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
309 inotify_watch_delete(watch, IN_DELETE_SELF);
310 break;
311 case VE_RMDIR:
312 /*
313 * It seems that IN_ISDIR should really be OR'd in here, but
314 * Linux doesn't seem to do that in this case; for the sake of
315 * bug-for-bug compatibility, we don't do it either.
316 */
317 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
318 inotify_watch_delete(watch, IN_DELETE_SELF);
319 break;
320 case VE_CREATE:
321 case VE_TRUNCATE:
322 case VE_RESIZE:
323 inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
324 break;
325 case VE_LINK:
326 inotify_watch_event(watch, IN_ATTRIB, NULL);
327 break;
328 case VE_RENAME_SRC_DIR:
329 inotify_watch_event(watch, IN_MOVED_FROM, name);
330 break;
331 case VE_RENAME_DEST_DIR:
332 if (name == NULL)
333 name = dvp->v_path;
334
335 inotify_watch_insert(watch, dvp, name);
336 inotify_watch_event(watch, IN_MOVED_TO, name);
337 break;
338 case VE_SUPPORT:
339 case VE_MOUNTEDOVER:
340 case VE_PRE_RENAME_SRC:
341 case VE_PRE_RENAME_DEST:
342 case VE_PRE_RENAME_DEST_DIR:
343 break;
344 }
345
346 return (vnext_vnevent(vf, vnevent, dvp, name, ct));
347 }
348
349 const fs_operation_def_t inotify_vnodesrc_template[] = {
350 VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
351 VOPNAME_CREATE, { .femop_create = inotify_fop_create },
352 VOPNAME_LINK, { .femop_link = inotify_fop_link },
353 VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
354 VOPNAME_OPEN, { .femop_open = inotify_fop_open },
355 VOPNAME_READ, { .femop_read = inotify_fop_read },
356 VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
357 VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
358 VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
359 VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
360 VOPNAME_WRITE, { .femop_write = inotify_fop_write },
361 VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
362 NULL, NULL
363 };
364
365 static int
366 inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
367 {
368 if (lhs->inw_wd < rhs->inw_wd)
369 return (-1);
370
371 if (lhs->inw_wd > rhs->inw_wd)
372 return (1);
373
374 return (0);
375 }
376
377 static int
378 inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
379 {
380 uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
381
382 if (lvp < rvp)
383 return (-1);
384
385 if (lvp > rvp)
386 return (1);
387
388 return (0);
389 }
390
391 static void
392 inotify_watch_hold(inotify_watch_t *watch)
393 {
394 mutex_enter(&watch->inw_lock);
395 VERIFY(watch->inw_refcnt > 0);
396 watch->inw_refcnt++;
397 mutex_exit(&watch->inw_lock);
398 }
399
400 static void
401 inotify_watch_release(inotify_watch_t *watch)
402 {
403 mutex_enter(&watch->inw_lock);
404 VERIFY(watch->inw_refcnt > 1);
405
406 if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
407 /*
408 * We're down to our last reference; kick anyone that might be
409 * waiting.
410 */
411 cv_signal(&watch->inw_cv);
412 }
413
414 mutex_exit(&watch->inw_lock);
415 }
416
417 static void
418 inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
419 {
420 inotify_kevent_t *event, *tail;
421 inotify_state_t *state = watch->inw_state;
422 uint32_t wd = watch->inw_wd, cookie = 0, len;
423 boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
424 inotify_watch_t *source = watch;
425
426 if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
427 return;
428
429 if (watch->inw_parent != NULL) {
430 /*
431 * This is an event on the child; if this isn't a valid child
432 * event, return. Otherwise, we move our watch to be our
433 * parent (which we know is around because we have a hold on
434 * it) and continue.
435 */
436 if (!(mask & IN_CHILD_EVENTS))
437 return;
438
439 name = watch->inw_name;
440 watch = watch->inw_parent;
441 wd = watch->inw_wd;
442 }
443
444 if (!removal) {
445 mutex_enter(&state->ins_lock);
446
447 if (watch->inw_zombie ||
448 watch->inw_fired || !watch->inw_active) {
449 mutex_exit(&state->ins_lock);
450 return;
451 }
452 } else {
453 if (!watch->inw_active)
454 return;
455
456 VERIFY(MUTEX_HELD(&state->ins_lock));
457 }
458
459 /*
460 * If this is an operation on a directory and it's a child event
461 * (event if it's not on a child), we specify IN_ISDIR.
462 */
463 if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
464 mask |= IN_ISDIR;
465
466 if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
467 cookie = (uint32_t)curthread->t_did;
468
469 if (state->ins_nevents >= state->ins_maxevents) {
470 /*
471 * We're at our maximum number of events -- turn our event
472 * into an IN_Q_OVERFLOW event, which will be coalesced if
473 * it's already the tail event.
474 */
475 mask = IN_Q_OVERFLOW;
476 wd = (uint32_t)-1;
477 cookie = 0;
478 len = 0;
479 }
480
481 if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
482 tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
483 ((tail->ine_event.len == 0 && len == 0) ||
484 (name != NULL && tail->ine_event.len != 0 &&
485 strcmp(tail->ine_event.name, name) == 0))) {
486 /*
487 * This is an implicitly coalesced event; we're done.
488 */
489 if (!removal)
490 mutex_exit(&state->ins_lock);
491 return;
492 }
493
494 if (name != NULL) {
495 len = strlen(name) + 1;
496 len = roundup(len, sizeof (struct inotify_event));
497 } else {
498 len = 0;
499 }
500
501 event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
502 event->ine_event.wd = wd;
503 event->ine_event.mask = (uint32_t)mask;
504 event->ine_event.cookie = cookie;
505 event->ine_event.len = len;
506
507 if (name != NULL)
508 (void) strcpy(event->ine_event.name, name);
509
510 if (tail != NULL) {
511 tail->ine_next = event;
512 } else {
513 VERIFY(state->ins_head == NULL);
514 state->ins_head = event;
515 cv_broadcast(&state->ins_cv);
516 }
517
518 state->ins_tail = event;
519 state->ins_nevents++;
520 state->ins_size += sizeof (event->ine_event) + len;
521
522 if (removal)
523 return;
524
525 if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
526 /*
527 * If this is a one-shot, we need to remove the watch. (Note
528 * that this will recurse back into inotify_watch_event() to
529 * fire the IN_IGNORED event -- but with "removal" set.)
530 */
531 watch->inw_fired = 1;
532 inotify_watch_remove(state, watch);
533 }
534
535 mutex_exit(&state->ins_lock);
536 pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
537 }
538
539 /*
540 * Destroy a watch. By the time we're in here, the watch must have exactly
541 * one reference.
542 */
543 static void
544 inotify_watch_destroy(inotify_watch_t *watch)
545 {
546 VERIFY(MUTEX_HELD(&watch->inw_lock));
547
548 if (watch->inw_name != NULL)
549 kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
550
551 kmem_free(watch, sizeof (inotify_watch_t));
552 }
553
554 /*
555 * Zombify a watch. By the time we come in here, it must be true that the
556 * watch has already been fem_uninstall()'d -- the only reference should be
557 * in the state's data structure. If we can get away with freeing it, we'll
558 * do that -- but if the reference count is greater than one due to an active
559 * vnode operation, we'll put this watch on the zombie list on the state
560 * structure.
561 */
562 static void
563 inotify_watch_zombify(inotify_watch_t *watch)
564 {
565 inotify_state_t *state = watch->inw_state;
566
567 VERIFY(MUTEX_HELD(&state->ins_lock));
568 VERIFY(!watch->inw_zombie);
569
570 watch->inw_zombie = 1;
571
572 if (watch->inw_parent != NULL) {
573 inotify_watch_release(watch->inw_parent);
574 } else {
575 avl_remove(&state->ins_byvp, watch);
576 avl_remove(&state->ins_bywd, watch);
577 vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
578 watch->inw_wd = -1;
579 }
580
581 mutex_enter(&watch->inw_lock);
582
583 if (watch->inw_refcnt == 1) {
584 /*
585 * There are no operations in flight and there is no way
586 * for anyone to discover this watch -- we can destroy it.
587 */
588 inotify_watch_destroy(watch);
589 } else {
590 /*
591 * There are operations in flight; we will need to enqueue
592 * this for later destruction.
593 */
594 watch->inw_parent = state->ins_zombies;
595 state->ins_zombies = watch;
596 mutex_exit(&watch->inw_lock);
597 }
598 }
599
600 static inotify_watch_t *
601 inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
602 const char *name, vnode_t *vp, uint32_t mask)
603 {
604 inotify_watch_t *watch;
605 int err;
606
607 VERIFY(MUTEX_HELD(&state->ins_lock));
608
609 watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
610
611 watch->inw_vp = vp;
612 watch->inw_mask = mask;
613 watch->inw_state = state;
614 watch->inw_refcnt = 1;
615
616 if (parent == NULL) {
617 watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
618 1, VM_BESTFIT | VM_SLEEP);
619 avl_add(&state->ins_byvp, watch);
620 avl_add(&state->ins_bywd, watch);
621
622 avl_create(&watch->inw_children,
623 (int(*)(const void *, const void *))inotify_watch_cmpvp,
624 sizeof (inotify_watch_t),
625 offsetof(inotify_watch_t, inw_byvp));
626 } else {
627 VERIFY(name != NULL);
628 inotify_watch_hold(parent);
629 watch->inw_mask &= IN_CHILD_EVENTS;
630 watch->inw_parent = parent;
631 watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
632 strcpy(watch->inw_name, name);
633
634 avl_add(&parent->inw_children, watch);
635 }
636
637 /*
638 * Add our monitor to the vnode. We must not have the watch lock held
639 * when we do this, as it will immediately hold our watch.
640 */
641 err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
642 (void (*)(void *))inotify_watch_hold,
643 (void (*)(void *))inotify_watch_release);
644
645 VERIFY(err == 0);
646
647 return (watch);
648 }
649
650 /*
651 * Remove a (non-child) watch. This is called from either synchronous context
652 * via inotify_rm_watch() or monitor context via either a vnevent or a
653 * one-shot.
654 */
655 static void
656 inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
657 {
658 inotify_watch_t *child;
659 int err;
660
661 VERIFY(MUTEX_HELD(&state->ins_lock));
662 VERIFY(watch->inw_parent == NULL);
663
664 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
665 VERIFY(err == 0);
666
667 /*
668 * If we have children, we're going to remove them all and set them
669 * all to be zombies.
670 */
671 while ((child = avl_first(&watch->inw_children)) != NULL) {
672 VERIFY(child->inw_parent == watch);
673 avl_remove(&watch->inw_children, child);
674
675 err = fem_uninstall(child->inw_vp, inotify_femp, child);
676 VERIFY(err == 0);
677
678 /*
679 * If this child watch has been orphaned, remove it from the
680 * state's list of orphans.
681 */
682 if (child->inw_orphaned) {
683 list_remove(&state->ins_orphans, child);
684 crfree(child->inw_cred);
685 }
686
687 VN_RELE(child->inw_vp);
688
689 /*
690 * We're down (or should be down) to a single reference to
691 * this child watch; it's safe to zombify it.
692 */
693 inotify_watch_zombify(child);
694 }
695
696 inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
697 VN_RELE(watch->inw_vp);
698
699 /*
700 * It's now safe to zombify the watch -- we know that the only reference
701 * can come from operations in flight.
702 */
703 inotify_watch_zombify(watch);
704 }
705
706 /*
707 * Delete a watch. Should only be called from VOP context.
708 */
709 static void
710 inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
711 {
712 inotify_state_t *state = watch->inw_state;
713 inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
714 int err;
715
716 if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
717 return;
718
719 mutex_enter(&state->ins_lock);
720
721 if (watch->inw_zombie) {
722 mutex_exit(&state->ins_lock);
723 return;
724 }
725
726 if ((parent = watch->inw_parent) == NULL) {
727 if (event == IN_DELETE_SELF) {
728 /*
729 * If we're here because we're being deleted and we
730 * are not a child watch, we need to delete the entire
731 * watch, children and all.
732 */
733 inotify_watch_remove(state, watch);
734 }
735
736 mutex_exit(&state->ins_lock);
737 return;
738 } else {
739 if (event == IN_DELETE_SELF &&
740 !(parent->inw_mask & IN_EXCL_UNLINK)) {
741 /*
742 * This is a child watch for a file that is being
743 * removed and IN_EXCL_UNLINK has not been specified;
744 * indicate that it is orphaned and add it to the list
745 * of orphans. (This list will be checked by the
746 * cleaning cyclic to determine when the watch has
747 * become the only hold on the vnode, at which point
748 * the watch can be zombified.) Note that we check
749 * if the watch is orphaned before we orphan it: hard
750 * links make it possible for VE_REMOVE to be called
751 * multiple times on the same vnode. (!)
752 */
753 if (!watch->inw_orphaned) {
754 watch->inw_orphaned = 1;
755 watch->inw_cred = CRED();
756 crhold(watch->inw_cred);
757 list_insert_head(&state->ins_orphans, watch);
758 }
759
760 mutex_exit(&state->ins_lock);
761 return;
762 }
763
764 if (watch->inw_orphaned) {
765 /*
766 * If we're here, a file was orphaned and then later
767 * moved -- which almost certainly means that hard
768 * links are on the scene. We choose the orphan over
769 * the move because we don't want to spuriously
770 * drop events if we can avoid it.
771 */
772 crfree(watch->inw_cred);
773 list_remove(&state->ins_orphans, watch);
774 }
775 }
776
777 if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
778 /*
779 * This watch has already been deleted from the parent.
780 */
781 mutex_exit(&state->ins_lock);
782 return;
783 }
784
785 avl_remove(&parent->inw_children, watch);
786 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
787 VERIFY(err == 0);
788
789 VN_RELE(watch->inw_vp);
790
791 /*
792 * It's now safe to zombify the watch -- which won't actually delete
793 * it as we know that the reference count is greater than 1.
794 */
795 inotify_watch_zombify(watch);
796 mutex_exit(&state->ins_lock);
797 }
798
799 /*
800 * Insert a new child watch. Should only be called from VOP context when
801 * a child is created in a watched directory.
802 */
803 static void
804 inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
805 {
806 inotify_state_t *state = watch->inw_state;
807 inotify_watch_t cmp = { .inw_vp = vp };
808
809 if (!(watch->inw_mask & IN_CHILD_EVENTS))
810 return;
811
812 mutex_enter(&state->ins_lock);
813
814 if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
815 mutex_exit(&state->ins_lock);
816 return;
817 }
818
819 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
820 mutex_exit(&state->ins_lock);
821 return;
822 }
823
824 VN_HOLD(vp);
825 watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
826 VERIFY(watch != NULL);
827
828 mutex_exit(&state->ins_lock);
829 }
830
831
832 static int
833 inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
834 int32_t *wdp)
835 {
836 inotify_watch_t *watch, cmp = { .inw_vp = vp };
837 uint32_t set;
838
839 set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
840
841 /*
842 * Lookup our vnode to determine if we already have a watch on it.
843 */
844 mutex_enter(&state->ins_lock);
845
846 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
847 /*
848 * We don't have this watch; allocate a new one, provided that
849 * we have fewer than our limit.
850 */
851 if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
852 mutex_exit(&state->ins_lock);
853 return (ENOSPC);
854 }
855
856 VN_HOLD(vp);
857 watch = inotify_watch_add(state, NULL, NULL, vp, set);
858 *wdp = watch->inw_wd;
859 mutex_exit(&state->ins_lock);
860
861 return (0);
862 }
863
864 VERIFY(!watch->inw_zombie);
865
866 if (!(mask & IN_MASK_ADD)) {
867 /*
868 * Note that if we're resetting our event mask and we're
869 * transitioning from an event mask that includes child events
870 * to one that doesn't, there will be potentially some stale
871 * child watches. This is basically fine: they won't fire,
872 * and they will correctly be removed when the watch is
873 * removed.
874 */
875 watch->inw_mask = 0;
876 }
877
878 watch->inw_mask |= set;
879
880 *wdp = watch->inw_wd;
881
882 mutex_exit(&state->ins_lock);
883
884 return (0);
885 }
886
887 static int
888 inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
889 {
890 inotify_watch_t *watch, cmp = { .inw_vp = vp };
891 vnode_t *cvp;
892 int err;
893
894 /*
895 * Verify that the specified child doesn't have a directory component
896 * within it.
897 */
898 if (strchr(name, '/') != NULL)
899 return (EINVAL);
900
901 /*
902 * Lookup the underlying file. Note that this will succeed even if
903 * we don't have permissions to actually read the file.
904 */
905 if ((err = lookupnameat(name,
906 UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
907 return (err);
908 }
909
910 /*
911 * Use our vnode to find our watch, and then add our child watch to it.
912 */
913 mutex_enter(&state->ins_lock);
914
915 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
916 /*
917 * This is unexpected -- it means that we don't have the
918 * watch that we thought we had.
919 */
920 mutex_exit(&state->ins_lock);
921 VN_RELE(cvp);
922 return (ENXIO);
923 }
924
925 /*
926 * Now lookup the child vnode in the watch; we'll only add it if it
927 * isn't already there.
928 */
929 cmp.inw_vp = cvp;
930
931 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
932 mutex_exit(&state->ins_lock);
933 VN_RELE(cvp);
934 return (0);
935 }
936
937 watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
938 VERIFY(watch != NULL);
939 mutex_exit(&state->ins_lock);
940
941 return (0);
942 }
943
944 static int
945 inotify_rm_watch(inotify_state_t *state, int32_t wd)
946 {
947 inotify_watch_t *watch, cmp = { .inw_wd = wd };
948
949 mutex_enter(&state->ins_lock);
950
951 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
952 mutex_exit(&state->ins_lock);
953 return (EINVAL);
954 }
955
956 inotify_watch_remove(state, watch);
957 mutex_exit(&state->ins_lock);
958
959 return (0);
960 }
961
962 static int
963 inotify_activate(inotify_state_t *state, int32_t wd)
964 {
965 inotify_watch_t *watch, cmp = { .inw_wd = wd };
966
967 mutex_enter(&state->ins_lock);
968
969 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
970 mutex_exit(&state->ins_lock);
971 return (EINVAL);
972 }
973
974 watch->inw_active = 1;
975
976 mutex_exit(&state->ins_lock);
977
978 return (0);
979 }
980
981 /*
982 * Called periodically as a cyclic to process the orphans and zombies.
983 */
984 static void
985 inotify_clean(void *arg)
986 {
987 inotify_state_t *state = arg;
988 inotify_watch_t *watch, *parent, *next, **prev;
989 cred_t *savecred;
990 int err;
991
992 mutex_enter(&state->ins_lock);
993
994 for (watch = list_head(&state->ins_orphans);
995 watch != NULL; watch = next) {
996 next = list_next(&state->ins_orphans, watch);
997
998 VERIFY(!watch->inw_zombie);
999 VERIFY((parent = watch->inw_parent) != NULL);
1000
1001 if (watch->inw_vp->v_count > 1)
1002 continue;
1003
1004 avl_remove(&parent->inw_children, watch);
1005 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
1006 VERIFY(err == 0);
1007
1008 list_remove(&state->ins_orphans, watch);
1009
1010 /*
1011 * For purposes of releasing the vnode, we need to switch our
1012 * cred to be the cred of the orphaning thread (which we held
1013 * at the time this watch was orphaned).
1014 */
1015 savecred = curthread->t_cred;
1016 curthread->t_cred = watch->inw_cred;
1017 VN_RELE(watch->inw_vp);
1018 crfree(watch->inw_cred);
1019 curthread->t_cred = savecred;
1020
1021 inotify_watch_zombify(watch);
1022 }
1023
1024 prev = &state->ins_zombies;
1025
1026 while ((watch = *prev) != NULL) {
1027 mutex_enter(&watch->inw_lock);
1028
1029 if (watch->inw_refcnt == 1) {
1030 *prev = watch->inw_parent;
1031 inotify_watch_destroy(watch);
1032 continue;
1033 }
1034
1035 prev = &watch->inw_parent;
1036 mutex_exit(&watch->inw_lock);
1037 }
1038
1039 mutex_exit(&state->ins_lock);
1040 }
1041
1042 /*ARGSUSED*/
1043 static int
1044 inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1045 {
1046 inotify_state_t *state;
1047 major_t major = getemajor(*devp);
1048 minor_t minor = getminor(*devp);
1049 int instances = 0;
1050 char c[64];
1051
1052 if (minor != INOTIFYMNRN_INOTIFY)
1053 return (ENXIO);
1054
1055 mutex_enter(&inotify_lock);
1056
1057 for (state = inotify_state; state != NULL; state = state->ins_next) {
1058 if (state->ins_cred == cred_p)
1059 instances++;
1060 }
1061
1062 if (instances >= inotify_maxinstances) {
1063 mutex_exit(&inotify_lock);
1064 return (EMFILE);
1065 }
1066
1067 minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
1068 VM_BESTFIT | VM_SLEEP);
1069
1070 if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
1071 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1072 mutex_exit(&inotify_lock);
1073 return (NULL);
1074 }
1075
1076 state = ddi_get_soft_state(inotify_softstate, minor);
1077 *devp = makedevice(major, minor);
1078
1079 crhold(cred_p);
1080 state->ins_cred = cred_p;
1081 state->ins_next = inotify_state;
1082 inotify_state = state;
1083
1084 (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
1085 state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
1086 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
1087
1088 avl_create(&state->ins_bywd,
1089 (int(*)(const void *, const void *))inotify_watch_cmpwd,
1090 sizeof (inotify_watch_t),
1091 offsetof(inotify_watch_t, inw_bywd));
1092
1093 avl_create(&state->ins_byvp,
1094 (int(*)(const void *, const void *))inotify_watch_cmpvp,
1095 sizeof (inotify_watch_t),
1096 offsetof(inotify_watch_t, inw_byvp));
1097
1098 list_create(&state->ins_orphans, sizeof (inotify_watch_t),
1099 offsetof(inotify_watch_t, inw_orphan));
1100
1101 state->ins_maxwatches = inotify_maxwatches;
1102 state->ins_maxevents = inotify_maxevents;
1103
1104 mutex_exit(&inotify_lock);
1105
1106 state->ins_cleaner = ddi_periodic_add(inotify_clean,
1107 state, NANOSEC, DDI_IPL_0);
1108
1109 return (0);
1110 }
1111
1112 /*ARGSUSED*/
1113 static int
1114 inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
1115 {
1116 inotify_state_t *state;
1117 inotify_kevent_t *event;
1118 minor_t minor = getminor(dev);
1119 int err = 0, nevents = 0;
1120 size_t len;
1121
1122 state = ddi_get_soft_state(inotify_softstate, minor);
1123
1124 mutex_enter(&state->ins_lock);
1125
1126 while (state->ins_head == NULL) {
1127 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
1128 mutex_exit(&state->ins_lock);
1129 return (EAGAIN);
1130 }
1131
1132 if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
1133 mutex_exit(&state->ins_lock);
1134 return (EINTR);
1135 }
1136 }
1137
1138 /*
1139 * We have events and we have our lock; return as many as we can.
1140 */
1141 while ((event = state->ins_head) != NULL) {
1142 len = sizeof (event->ine_event) + event->ine_event.len;
1143
1144 if (uio->uio_resid < len) {
1145 if (nevents == 0)
1146 err = EINVAL;
1147 break;
1148 }
1149
1150 nevents++;
1151
1152 if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
1153 break;
1154
1155 VERIFY(state->ins_nevents > 0);
1156 state->ins_nevents--;
1157
1158 VERIFY(state->ins_size > 0);
1159 state->ins_size -= len;
1160
1161 if ((state->ins_head = event->ine_next) == NULL) {
1162 VERIFY(event == state->ins_tail);
1163 VERIFY(state->ins_nevents == 0);
1164 state->ins_tail = NULL;
1165 }
1166
1167 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1168 }
1169
1170 mutex_exit(&state->ins_lock);
1171
1172 return (err);
1173 }
1174
1175 /*ARGSUSED*/
1176 static int
1177 inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
1178 struct pollhead **phpp)
1179 {
1180 inotify_state_t *state;
1181 minor_t minor = getminor(dev);
1182
1183 state = ddi_get_soft_state(inotify_softstate, minor);
1184
1185 mutex_enter(&state->ins_lock);
1186
1187 if (state->ins_head != NULL) {
1188 *reventsp = events & (POLLRDNORM | POLLIN);
1189 } else {
1190 *reventsp = 0;
1191
1192 if (!anyyet)
1193 *phpp = &state->ins_pollhd;
1194 }
1195
1196 mutex_exit(&state->ins_lock);
1197
1198 return (0);
1199 }
1200
1201 /*ARGSUSED*/
1202 static int
1203 inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
1204 {
1205 inotify_state_t *state;
1206 minor_t minor = getminor(dev);
1207 file_t *fp;
1208 int rval;
1209
1210 state = ddi_get_soft_state(inotify_softstate, minor);
1211
1212 switch (cmd) {
1213 case INOTIFYIOC_ADD_WATCH: {
1214 inotify_addwatch_t addwatch;
1215 file_t *fp;
1216
1217 if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
1218 return (EFAULT);
1219
1220 if ((fp = getf(addwatch.inaw_fd)) == NULL)
1221 return (EBADF);
1222
1223 rval = inotify_add_watch(state, fp->f_vnode,
1224 addwatch.inaw_mask, rv);
1225
1226 releasef(addwatch.inaw_fd);
1227 return (rval);
1228 }
1229
1230 case INOTIFYIOC_ADD_CHILD: {
1231 inotify_addchild_t addchild;
1232 char name[MAXPATHLEN];
1233
1234 if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
1235 return (EFAULT);
1236
1237 if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
1238 return (EFAULT);
1239
1240 if ((fp = getf(addchild.inac_fd)) == NULL)
1241 return (EBADF);
1242
1243 rval = inotify_add_child(state, fp->f_vnode, name);
1244
1245 releasef(addchild.inac_fd);
1246 return (rval);
1247 }
1248
1249 case INOTIFYIOC_RM_WATCH:
1250 return (inotify_rm_watch(state, arg));
1251
1252 case INOTIFYIOC_ACTIVATE:
1253 return (inotify_activate(state, arg));
1254
1255 case FIONREAD: {
1256 int32_t size;
1257
1258 mutex_enter(&state->ins_lock);
1259 size = state->ins_size;
1260 mutex_exit(&state->ins_lock);
1261
1262 if (copyout(&size, (void *)arg, sizeof (size)) != 0)
1263 return (EFAULT);
1264
1265 return (0);
1266 }
1267
1268 default:
1269 break;
1270 }
1271
1272 return (ENOTTY);
1273 }
1274
1275 /*ARGSUSED*/
1276 static int
1277 inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
1278 {
1279 inotify_state_t *state, **sp;
1280 inotify_watch_t *watch, *zombies;
1281 inotify_kevent_t *event;
1282 minor_t minor = getminor(dev);
1283
1284 state = ddi_get_soft_state(inotify_softstate, minor);
1285
1286 if (state->ins_pollhd.ph_list != NULL) {
1287 pollwakeup(&state->ins_pollhd, POLLERR);
1288 pollhead_clean(&state->ins_pollhd);
1289 }
1290
1291 mutex_enter(&state->ins_lock);
1292
1293 /*
1294 * First, destroy all of our watches.
1295 */
1296 while ((watch = avl_first(&state->ins_bywd)) != NULL)
1297 inotify_watch_remove(state, watch);
1298
1299 /*
1300 * And now destroy our event queue.
1301 */
1302 while ((event = state->ins_head) != NULL) {
1303 state->ins_head = event->ine_next;
1304 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1305 }
1306
1307 zombies = state->ins_zombies;
1308 state->ins_zombies = NULL;
1309 mutex_exit(&state->ins_lock);
1310
1311 /*
1312 * Now that our state lock is dropped, we can synchronously wait on
1313 * any zombies.
1314 */
1315 while ((watch = zombies) != NULL) {
1316 zombies = zombies->inw_parent;
1317
1318 mutex_enter(&watch->inw_lock);
1319
1320 while (watch->inw_refcnt > 1)
1321 cv_wait(&watch->inw_cv, &watch->inw_lock);
1322
1323 inotify_watch_destroy(watch);
1324 }
1325
1326 if (state->ins_cleaner != NULL) {
1327 ddi_periodic_delete(state->ins_cleaner);
1328 state->ins_cleaner = NULL;
1329 }
1330
1331 mutex_enter(&inotify_lock);
1332
1333 /*
1334 * Remove our state from our global list, and release our hold on
1335 * the cred.
1336 */
1337 for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
1338 VERIFY(*sp != NULL);
1339
1340 *sp = (*sp)->ins_next;
1341 crfree(state->ins_cred);
1342 vmem_destroy(state->ins_wds);
1343
1344 ddi_soft_state_free(inotify_softstate, minor);
1345 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1346
1347 mutex_exit(&inotify_lock);
1348
1349 return (0);
1350 }
1351
1352 /*ARGSUSED*/
1353 static int
1354 inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1355 {
1356 mutex_enter(&inotify_lock);
1357
1358 if (ddi_soft_state_init(&inotify_softstate,
1359 sizeof (inotify_state_t), 0) != 0) {
1360 cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
1361 mutex_exit(&inotify_lock);
1362 return (DDI_FAILURE);
1363 }
1364
1365 if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
1366 INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
1367 cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
1368 ddi_soft_state_fini(&inotify_softstate);
1369 mutex_exit(&inotify_lock);
1370 return (DDI_FAILURE);
1371 }
1372
1373 if (fem_create("inotify_fem",
1374 inotify_vnodesrc_template, &inotify_femp) != 0) {
1375 cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
1376 ddi_remove_minor_node(devi, NULL);
1377 ddi_soft_state_fini(&inotify_softstate);
1378 mutex_exit(&inotify_lock);
1379 return (DDI_FAILURE);
1380 }
1381
1382 ddi_report_dev(devi);
1383 inotify_devi = devi;
1384
1385 inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
1386 UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
1387 VM_SLEEP | VMC_IDENTIFIER);
1388
1389 mutex_exit(&inotify_lock);
1390
1391 return (DDI_SUCCESS);
1392 }
1393
1394 /*ARGSUSED*/
1395 static int
1396 inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1397 {
1398 switch (cmd) {
1399 case DDI_DETACH:
1400 break;
1401
1402 case DDI_SUSPEND:
1403 return (DDI_SUCCESS);
1404
1405 default:
1406 return (DDI_FAILURE);
1407 }
1408
1409 mutex_enter(&inotify_lock);
1410 fem_free(inotify_femp);
1411 vmem_destroy(inotify_minor);
1412
1413 ddi_remove_minor_node(inotify_devi, NULL);
1414 inotify_devi = NULL;
1415
1416 ddi_soft_state_fini(&inotify_softstate);
1417 mutex_exit(&inotify_lock);
1418
1419 return (DDI_SUCCESS);
1420 }
1421
1422 /*ARGSUSED*/
1423 static int
1424 inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1425 {
1426 int error;
1427
1428 switch (infocmd) {
1429 case DDI_INFO_DEVT2DEVINFO:
1430 *result = (void *)inotify_devi;
1431 error = DDI_SUCCESS;
1432 break;
1433 case DDI_INFO_DEVT2INSTANCE:
1434 *result = (void *)0;
1435 error = DDI_SUCCESS;
1436 break;
1437 default:
1438 error = DDI_FAILURE;
1439 }
1440 return (error);
1441 }
1442
1443 static struct cb_ops inotify_cb_ops = {
1444 inotify_open, /* open */
1445 inotify_close, /* close */
1446 nulldev, /* strategy */
1447 nulldev, /* print */
1448 nodev, /* dump */
1449 inotify_read, /* read */
1450 nodev, /* write */
1451 inotify_ioctl, /* ioctl */
1452 nodev, /* devmap */
1453 nodev, /* mmap */
1454 nodev, /* segmap */
1455 inotify_poll, /* poll */
1456 ddi_prop_op, /* cb_prop_op */
1457 0, /* streamtab */
1458 D_NEW | D_MP /* Driver compatibility flag */
1459 };
1460
1461 static struct dev_ops inotify_ops = {
1462 DEVO_REV, /* devo_rev */
1463 0, /* refcnt */
1464 inotify_info, /* get_dev_info */
1465 nulldev, /* identify */
1466 nulldev, /* probe */
1467 inotify_attach, /* attach */
1468 inotify_detach, /* detach */
1469 nodev, /* reset */
1470 &inotify_cb_ops, /* driver operations */
1471 NULL, /* bus operations */
1472 nodev, /* dev power */
1473 ddi_quiesce_not_needed, /* quiesce */
1474 };
1475
1476 static struct modldrv modldrv = {
1477 &mod_driverops, /* module type (this is a pseudo driver) */
1478 "inotify support", /* name of module */
1479 &inotify_ops, /* driver ops */
1480 };
1481
1482 static struct modlinkage modlinkage = {
1483 MODREV_1,
1484 (void *)&modldrv,
1485 NULL
1486 };
1487
1488 int
1489 _init(void)
1490 {
1491 return (mod_install(&modlinkage));
1492 }
1493
1494 int
1495 _info(struct modinfo *modinfop)
1496 {
1497 return (mod_info(&modlinkage, modinfop));
1498 }
1499
1500 int
1501 _fini(void)
1502 {
1503 return (mod_remove(&modlinkage));
1504 }