1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 * Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
15 */
16
17 /*
18 * Support for the inotify facility, a Linux-borne facility for asynchronous
19 * notification of certain events on specified files or directories. Our
20 * implementation broadly leverages the file event monitoring facility, and
21 * would actually be quite straightforward were it not for a very serious
22 * blunder in the inotify interface: in addition to allowing for one to be
23 * notified on events on a particular file or directory, inotify also allows
24 * for one to be notified on certain events on files _within_ a watched
25 * directory -- even though those events have absolutely nothing to do with
26 * the directory itself. This leads to all sorts of madness because file
27 * operations are (of course) not undertaken on paths but rather on open
28 * files -- and the relationships between open files and the paths that resolve
29 * to those files are neither static nor isomorphic. We implement this
30 * concept by having _child watches_ when directories are watched with events
31 * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
32 * first added, and we modify those child watches dynamically as files are
33 * created, deleted, moved into or moved out of the specified directory. This
34 * mechanism works well, absent hard links. Hard links, unfortunately, break
35 * this rather badly, and the user is warned that watches on directories that
36 * have multiple directory entries referring to the same file may behave
37 * unexpectedly.
38 */
39
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/inotify.h>
43 #include <sys/fem.h>
44 #include <sys/conf.h>
45 #include <sys/stat.h>
46 #include <sys/vfs_opreg.h>
47 #include <sys/vmem.h>
48 #include <sys/avl.h>
49 #include <sys/sysmacros.h>
50 #include <sys/cyclic.h>
51 #include <sys/filio.h>
52
53 struct inotify_state;
54 struct inotify_kevent;
55
56 typedef struct inotify_watch inotify_watch_t;
57 typedef struct inotify_state inotify_state_t;
58 typedef struct inotify_kevent inotify_kevent_t;
59
60 struct inotify_watch {
61 kmutex_t inw_lock; /* lock protecting ref count */
62 int inw_refcnt; /* reference count */
63 uint8_t inw_zombie:1; /* boolean: is zombie */
64 uint8_t inw_fired:1; /* boolean: fired one-shot */
65 uint8_t inw_active:1; /* boolean: watch is active */
66 uint8_t inw_orphaned:1; /* boolean: orphaned */
67 kcondvar_t inw_cv; /* condvar for zombifier */
68 uint32_t inw_mask; /* mask of watch */
69 int32_t inw_wd; /* watch descriptor */
70 vnode_t *inw_vp; /* underlying vnode */
71 inotify_watch_t *inw_parent; /* parent, if a child */
72 avl_node_t inw_byvp; /* watches by vnode */
73 avl_node_t inw_bywd; /* watches by descriptor */
74 avl_tree_t inw_children; /* children, if a parent */
75 char *inw_name; /* name, if a child */
76 list_node_t inw_orphan; /* orphan list */
77 cred_t *inw_cred; /* cred, if orphaned */
78 inotify_state_t *inw_state; /* corresponding state */
79 };
80
81 struct inotify_kevent {
82 inotify_kevent_t *ine_next; /* next event in queue */
83 struct inotify_event ine_event; /* event (variable size) */
84 };
85
86 #define INOTIFY_EVENT_LENGTH(ev) \
87 (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
88
89 struct inotify_state {
90 kmutex_t ins_lock; /* lock protecting state */
91 avl_tree_t ins_byvp; /* watches by vnode */
92 avl_tree_t ins_bywd; /* watches by descriptor */
93 vmem_t *ins_wds; /* watch identifier arena */
94 int ins_maxwatches; /* maximum number of watches */
95 int ins_maxevents; /* maximum number of events */
96 int ins_nevents; /* current # of events */
97 int32_t ins_size; /* total size of events */
98 inotify_kevent_t *ins_head; /* head of event queue */
99 inotify_kevent_t *ins_tail; /* tail of event queue */
100 pollhead_t ins_pollhd; /* poll head */
101 kcondvar_t ins_cv; /* condvar for reading */
102 list_t ins_orphans; /* orphan list */
103 ddi_periodic_t ins_cleaner; /* cyclic for cleaning */
104 inotify_watch_t *ins_zombies; /* zombie watch list */
105 cred_t *ins_cred; /* creator's credentials */
106 inotify_state_t *ins_next; /* next state on global list */
107 };
108
109 /*
110 * Tunables (exported read-only in lx-branded zones via /proc).
111 */
112 int inotify_maxwatches = 8192; /* max watches per instance */
113 int inotify_maxevents = 16384; /* max events */
114 int inotify_maxinstances = 128; /* max instances per user */
115
116 /*
117 * Internal global variables.
118 */
119 static kmutex_t inotify_lock; /* lock protecting state */
120 static dev_info_t *inotify_devi; /* device info */
121 static fem_t *inotify_femp; /* FEM pointer */
122 static vmem_t *inotify_minor; /* minor number arena */
123 static void *inotify_softstate; /* softstate pointer */
124 static inotify_state_t *inotify_state; /* global list if state */
125
126 static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
127 static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
128 static void inotify_watch_delete(inotify_watch_t *, uint32_t);
129 static void inotify_watch_remove(inotify_state_t *state,
130 inotify_watch_t *watch);
131
132 static int
133 inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
134 cred_t *cr, caller_context_t *ct)
135 {
136 inotify_watch_t *watch = vf->fa_fnode->fn_available;
137 int rval;
138
139 if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
140 inotify_watch_event(watch, flag & FWRITE ?
141 IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
142 }
143
144 return (rval);
145 }
146
147 static int
148 inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
149 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
150 vsecattr_t *vsecp)
151 {
152 inotify_watch_t *watch = vf->fa_fnode->fn_available;
153 int rval;
154
155 if ((rval = vnext_create(vf, name, vap, excl, mode,
156 vpp, cr, flag, ct, vsecp)) == 0) {
157 inotify_watch_insert(watch, *vpp, name);
158 inotify_watch_event(watch, IN_CREATE, name);
159 }
160
161 return (rval);
162 }
163
164 static int
165 inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
166 caller_context_t *ct, int flags)
167 {
168 inotify_watch_t *watch = vf->fa_fnode->fn_available;
169 int rval;
170
171 if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
172 inotify_watch_insert(watch, svp, tnm);
173 inotify_watch_event(watch, IN_CREATE, tnm);
174 }
175
176 return (rval);
177 }
178
179 static int
180 inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
181 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
182 {
183 inotify_watch_t *watch = vf->fa_fnode->fn_available;
184 int rval;
185
186 if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
187 ct, flags, vsecp)) == 0) {
188 inotify_watch_insert(watch, *vpp, name);
189 inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
190 }
191
192 return (rval);
193 }
194
195 static int
196 inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
197 {
198 inotify_watch_t *watch = vf->fa_fnode->fn_available;
199 int rval;
200
201 if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
202 inotify_watch_event(watch, IN_OPEN, NULL);
203
204 return (rval);
205 }
206
207 static int
208 inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
209 caller_context_t *ct)
210 {
211 inotify_watch_t *watch = vf->fa_fnode->fn_available;
212 int rval = vnext_read(vf, uiop, ioflag, cr, ct);
213 inotify_watch_event(watch, IN_ACCESS, NULL);
214
215 return (rval);
216 }
217
218 static int
219 inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
220 caller_context_t *ct, int flags)
221 {
222 inotify_watch_t *watch = vf->fa_fnode->fn_available;
223 int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
224 inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
225
226 return (rval);
227 }
228
229 int
230 inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
231 int flags)
232 {
233 inotify_watch_t *watch = vf->fa_fnode->fn_available;
234 int rval;
235
236 if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
237 inotify_watch_event(watch, IN_DELETE, nm);
238
239 return (rval);
240 }
241
242 int
243 inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
244 caller_context_t *ct, int flags)
245 {
246 inotify_watch_t *watch = vf->fa_fnode->fn_available;
247 int rval;
248
249 if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
250 inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
251
252 return (rval);
253 }
254
255 static int
256 inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
257 caller_context_t *ct)
258 {
259 inotify_watch_t *watch = vf->fa_fnode->fn_available;
260 int rval;
261
262 if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
263 inotify_watch_event(watch, IN_ATTRIB, NULL);
264
265 return (rval);
266 }
267
268 static int
269 inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
270 caller_context_t *ct)
271 {
272 inotify_watch_t *watch = vf->fa_fnode->fn_available;
273 int rval = vnext_write(vf, uiop, ioflag, cr, ct);
274 inotify_watch_event(watch, IN_MODIFY, NULL);
275
276 return (rval);
277 }
278
279 static int
280 inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
281 caller_context_t *ct)
282 {
283 inotify_watch_t *watch = vf->fa_fnode->fn_available;
284
285 switch (vnevent) {
286 case VE_RENAME_SRC:
287 inotify_watch_event(watch, IN_MOVE_SELF, NULL);
288 inotify_watch_delete(watch, IN_MOVE_SELF);
289 break;
290 case VE_REMOVE:
291 /*
292 * Linux will apparently fire an IN_ATTRIB event when the link
293 * count changes (including when it drops to 0 on a remove).
294 * This is merely somewhat odd; what is amazing is that this
295 * IN_ATTRIB event is not visible on an inotify watch on the
296 * parent directory. (IN_ATTRIB events are normally sent to
297 * watches on the parent directory). While it's hard to
298 * believe that this constitutes desired semantics, ltp
299 * unfortunately tests this case (if implicitly); in the name
300 * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
301 * explicitly watching the file that has been removed.
302 */
303 if (watch->inw_parent == NULL)
304 inotify_watch_event(watch, IN_ATTRIB, NULL);
305
306 /*FALLTHROUGH*/
307 case VE_RENAME_DEST:
308 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
309 inotify_watch_delete(watch, IN_DELETE_SELF);
310 break;
311 case VE_RMDIR:
312 /*
313 * It seems that IN_ISDIR should really be OR'd in here, but
314 * Linux doesn't seem to do that in this case; for the sake of
315 * bug-for-bug compatibility, we don't do it either.
316 */
317 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
318 inotify_watch_delete(watch, IN_DELETE_SELF);
319 break;
320 case VE_CREATE:
321 case VE_TRUNCATE:
322 case VE_RESIZE:
323 inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
324 break;
325 case VE_LINK:
326 inotify_watch_event(watch, IN_ATTRIB, NULL);
327 break;
328 case VE_RENAME_SRC_DIR:
329 inotify_watch_event(watch, IN_MOVED_FROM, name);
330 break;
331 case VE_RENAME_DEST_DIR:
332 if (name == NULL)
333 name = dvp->v_path;
334
335 inotify_watch_insert(watch, dvp, name);
336 inotify_watch_event(watch, IN_MOVED_TO, name);
337 break;
338 case VE_SUPPORT:
339 case VE_MOUNTEDOVER:
340 case VE_PRE_RENAME_SRC:
341 case VE_PRE_RENAME_DEST:
342 case VE_PRE_RENAME_DEST_DIR:
343 break;
344 }
345
346 return (vnext_vnevent(vf, vnevent, dvp, name, ct));
347 }
348
349 const fs_operation_def_t inotify_vnodesrc_template[] = {
350 VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
351 VOPNAME_CREATE, { .femop_create = inotify_fop_create },
352 VOPNAME_LINK, { .femop_link = inotify_fop_link },
353 VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
354 VOPNAME_OPEN, { .femop_open = inotify_fop_open },
355 VOPNAME_READ, { .femop_read = inotify_fop_read },
356 VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
357 VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
358 VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
359 VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
360 VOPNAME_WRITE, { .femop_write = inotify_fop_write },
361 VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
362 NULL, NULL
363 };
364
365 static int
366 inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
367 {
368 if (lhs->inw_wd < rhs->inw_wd)
369 return (-1);
370
371 if (lhs->inw_wd > rhs->inw_wd)
372 return (1);
373
374 return (0);
375 }
376
377 static int
378 inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
379 {
380 uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
381
382 if (lvp < rvp)
383 return (-1);
384
385 if (lvp > rvp)
386 return (1);
387
388 return (0);
389 }
390
391 static void
392 inotify_watch_hold(inotify_watch_t *watch)
393 {
394 mutex_enter(&watch->inw_lock);
395 VERIFY(watch->inw_refcnt > 0);
396 watch->inw_refcnt++;
397 mutex_exit(&watch->inw_lock);
398 }
399
400 static void
401 inotify_watch_release(inotify_watch_t *watch)
402 {
403 mutex_enter(&watch->inw_lock);
404 VERIFY(watch->inw_refcnt > 1);
405
406 if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
407 /*
408 * We're down to our last reference; kick anyone that might be
409 * waiting.
410 */
411 cv_signal(&watch->inw_cv);
412 }
413
414 mutex_exit(&watch->inw_lock);
415 }
416
417 static void
418 inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
419 {
420 inotify_kevent_t *event, *tail;
421 inotify_state_t *state = watch->inw_state;
422 uint32_t wd = watch->inw_wd, cookie = 0, len;
423 boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
424 inotify_watch_t *source = watch;
425
426 if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
427 return;
428
429 if (watch->inw_parent != NULL) {
430 /*
431 * This is an event on the child; if this isn't a valid child
432 * event, return. Otherwise, we move our watch to be our
433 * parent (which we know is around because we have a hold on
434 * it) and continue.
435 */
436 if (!(mask & IN_CHILD_EVENTS))
437 return;
438
439 name = watch->inw_name;
440 watch = watch->inw_parent;
441 wd = watch->inw_wd;
442 }
443
444 if (!removal) {
445 mutex_enter(&state->ins_lock);
446
447 if (watch->inw_zombie ||
448 watch->inw_fired || !watch->inw_active) {
449 mutex_exit(&state->ins_lock);
450 return;
451 }
452 } else {
453 if (!watch->inw_active)
454 return;
455
456 VERIFY(MUTEX_HELD(&state->ins_lock));
457 }
458
459 /*
460 * If this is an operation on a directory and it's a child event
461 * (event if it's not on a child), we specify IN_ISDIR.
462 */
463 if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
464 mask |= IN_ISDIR;
465
466 if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
467 cookie = (uint32_t)curthread->t_did;
468
469 if (state->ins_nevents >= state->ins_maxevents) {
470 /*
471 * We're at our maximum number of events -- turn our event
472 * into an IN_Q_OVERFLOW event, which will be coalesced if
473 * it's already the tail event.
474 */
475 mask = IN_Q_OVERFLOW;
476 wd = (uint32_t)-1;
477 cookie = 0;
478 len = 0;
479 }
480
481 if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
482 tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
483 ((tail->ine_event.len == 0 && len == 0) ||
484 (name != NULL && tail->ine_event.len != 0 &&
485 strcmp(tail->ine_event.name, name) == 0))) {
486 /*
487 * This is an implicitly coalesced event; we're done.
488 */
489 if (!removal)
490 mutex_exit(&state->ins_lock);
491 return;
492 }
493
494 if (name != NULL) {
495 len = strlen(name) + 1;
496 len = roundup(len, sizeof (struct inotify_event));
497 } else {
498 len = 0;
499 }
500
501 event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
502 event->ine_event.wd = wd;
503 event->ine_event.mask = (uint32_t)mask;
504 event->ine_event.cookie = cookie;
505 event->ine_event.len = len;
506
507 if (name != NULL)
508 (void) strcpy(event->ine_event.name, name);
509
510 if (tail != NULL) {
511 tail->ine_next = event;
512 } else {
513 VERIFY(state->ins_head == NULL);
514 state->ins_head = event;
515 cv_broadcast(&state->ins_cv);
516 }
517
518 state->ins_tail = event;
519 state->ins_nevents++;
520 state->ins_size += sizeof (event->ine_event) + len;
521
522 if (removal)
523 return;
524
525 if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
526 /*
527 * If this is a one-shot, we need to remove the watch. (Note
528 * that this will recurse back into inotify_watch_event() to
529 * fire the IN_IGNORED event -- but with "removal" set.)
530 */
531 watch->inw_fired = 1;
532 inotify_watch_remove(state, watch);
533 }
534
535 mutex_exit(&state->ins_lock);
536 pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
537 }
538
539 /*
540 * Destroy a watch. By the time we're in here, the watch must have exactly
541 * one reference.
542 */
543 static void
544 inotify_watch_destroy(inotify_watch_t *watch)
545 {
546 VERIFY(MUTEX_HELD(&watch->inw_lock));
547
548 if (watch->inw_name != NULL)
549 kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
550
551 kmem_free(watch, sizeof (inotify_watch_t));
552 }
553
554 /*
555 * Zombify a watch. By the time we come in here, it must be true that the
556 * watch has already been fem_uninstall()'d -- the only reference should be
557 * in the state's data structure. If we can get away with freeing it, we'll
558 * do that -- but if the reference count is greater than one due to an active
559 * vnode operation, we'll put this watch on the zombie list on the state
560 * structure.
561 */
562 static void
563 inotify_watch_zombify(inotify_watch_t *watch)
564 {
565 inotify_state_t *state = watch->inw_state;
566
567 VERIFY(MUTEX_HELD(&state->ins_lock));
568 VERIFY(!watch->inw_zombie);
569
570 watch->inw_zombie = 1;
571
572 if (watch->inw_parent != NULL) {
573 inotify_watch_release(watch->inw_parent);
574 } else {
575 avl_remove(&state->ins_byvp, watch);
576 avl_remove(&state->ins_bywd, watch);
577 vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
578 watch->inw_wd = -1;
579 }
580
581 mutex_enter(&watch->inw_lock);
582
583 if (watch->inw_refcnt == 1) {
584 /*
585 * There are no operations in flight and there is no way
586 * for anyone to discover this watch -- we can destroy it.
587 */
588 inotify_watch_destroy(watch);
589 } else {
590 /*
591 * There are operations in flight; we will need to enqueue
592 * this for later destruction.
593 */
594 watch->inw_parent = state->ins_zombies;
595 state->ins_zombies = watch;
596 mutex_exit(&watch->inw_lock);
597 }
598 }
599
600 static inotify_watch_t *
601 inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
602 const char *name, vnode_t *vp, uint32_t mask)
603 {
604 inotify_watch_t *watch;
605 int err;
606
607 VERIFY(MUTEX_HELD(&state->ins_lock));
608
609 watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
610
611 watch->inw_vp = vp;
612 watch->inw_mask = mask;
613 watch->inw_state = state;
614 watch->inw_refcnt = 1;
615
616 if (parent == NULL) {
617 watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
618 1, VM_BESTFIT | VM_SLEEP);
619 avl_add(&state->ins_byvp, watch);
620 avl_add(&state->ins_bywd, watch);
621
622 avl_create(&watch->inw_children,
623 (int(*)(const void *, const void *))inotify_watch_cmpvp,
624 sizeof (inotify_watch_t),
625 offsetof(inotify_watch_t, inw_byvp));
626 } else {
627 VERIFY(name != NULL);
628 inotify_watch_hold(parent);
629 watch->inw_mask &= IN_CHILD_EVENTS;
630 watch->inw_parent = parent;
631 watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
632 /* strcpy() is safe, because strlen(name) bounds us. */
633 (void) strcpy(watch->inw_name, name);
634
635 avl_add(&parent->inw_children, watch);
636 }
637
638 /*
639 * Add our monitor to the vnode. We must not have the watch lock held
640 * when we do this, as it will immediately hold our watch.
641 */
642 err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
643 (void (*)(void *))inotify_watch_hold,
644 (void (*)(void *))inotify_watch_release);
645
646 VERIFY(err == 0);
647
648 return (watch);
649 }
650
651 /*
652 * Remove a (non-child) watch. This is called from either synchronous context
653 * via inotify_rm_watch() or monitor context via either a vnevent or a
654 * one-shot.
655 */
656 static void
657 inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
658 {
659 inotify_watch_t *child;
660 int err;
661
662 VERIFY(MUTEX_HELD(&state->ins_lock));
663 VERIFY(watch->inw_parent == NULL);
664
665 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
666 VERIFY(err == 0);
667
668 /*
669 * If we have children, we're going to remove them all and set them
670 * all to be zombies.
671 */
672 while ((child = avl_first(&watch->inw_children)) != NULL) {
673 VERIFY(child->inw_parent == watch);
674 avl_remove(&watch->inw_children, child);
675
676 err = fem_uninstall(child->inw_vp, inotify_femp, child);
677 VERIFY(err == 0);
678
679 /*
680 * If this child watch has been orphaned, remove it from the
681 * state's list of orphans.
682 */
683 if (child->inw_orphaned) {
684 list_remove(&state->ins_orphans, child);
685 crfree(child->inw_cred);
686 }
687
688 VN_RELE(child->inw_vp);
689
690 /*
691 * We're down (or should be down) to a single reference to
692 * this child watch; it's safe to zombify it.
693 */
694 inotify_watch_zombify(child);
695 }
696
697 inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
698 VN_RELE(watch->inw_vp);
699
700 /*
701 * It's now safe to zombify the watch -- we know that the only reference
702 * can come from operations in flight.
703 */
704 inotify_watch_zombify(watch);
705 }
706
707 /*
708 * Delete a watch. Should only be called from VOP context.
709 */
710 static void
711 inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
712 {
713 inotify_state_t *state = watch->inw_state;
714 inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
715 int err;
716
717 if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
718 return;
719
720 mutex_enter(&state->ins_lock);
721
722 if (watch->inw_zombie) {
723 mutex_exit(&state->ins_lock);
724 return;
725 }
726
727 if ((parent = watch->inw_parent) == NULL) {
728 if (event == IN_DELETE_SELF) {
729 /*
730 * If we're here because we're being deleted and we
731 * are not a child watch, we need to delete the entire
732 * watch, children and all.
733 */
734 inotify_watch_remove(state, watch);
735 }
736
737 mutex_exit(&state->ins_lock);
738 return;
739 } else {
740 if (event == IN_DELETE_SELF &&
741 !(parent->inw_mask & IN_EXCL_UNLINK)) {
742 /*
743 * This is a child watch for a file that is being
744 * removed and IN_EXCL_UNLINK has not been specified;
745 * indicate that it is orphaned and add it to the list
746 * of orphans. (This list will be checked by the
747 * cleaning cyclic to determine when the watch has
748 * become the only hold on the vnode, at which point
749 * the watch can be zombified.) Note that we check
750 * if the watch is orphaned before we orphan it: hard
751 * links make it possible for VE_REMOVE to be called
752 * multiple times on the same vnode. (!)
753 */
754 if (!watch->inw_orphaned) {
755 watch->inw_orphaned = 1;
756 watch->inw_cred = CRED();
757 crhold(watch->inw_cred);
758 list_insert_head(&state->ins_orphans, watch);
759 }
760
761 mutex_exit(&state->ins_lock);
762 return;
763 }
764
765 if (watch->inw_orphaned) {
766 /*
767 * If we're here, a file was orphaned and then later
768 * moved -- which almost certainly means that hard
769 * links are on the scene. We choose the orphan over
770 * the move because we don't want to spuriously
771 * drop events if we can avoid it.
772 */
773 crfree(watch->inw_cred);
774 list_remove(&state->ins_orphans, watch);
775 }
776 }
777
778 if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
779 /*
780 * This watch has already been deleted from the parent.
781 */
782 mutex_exit(&state->ins_lock);
783 return;
784 }
785
786 avl_remove(&parent->inw_children, watch);
787 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
788 VERIFY(err == 0);
789
790 VN_RELE(watch->inw_vp);
791
792 /*
793 * It's now safe to zombify the watch -- which won't actually delete
794 * it as we know that the reference count is greater than 1.
795 */
796 inotify_watch_zombify(watch);
797 mutex_exit(&state->ins_lock);
798 }
799
800 /*
801 * Insert a new child watch. Should only be called from VOP context when
802 * a child is created in a watched directory.
803 */
804 static void
805 inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
806 {
807 inotify_state_t *state = watch->inw_state;
808 inotify_watch_t cmp = { .inw_vp = vp };
809
810 if (!(watch->inw_mask & IN_CHILD_EVENTS))
811 return;
812
813 mutex_enter(&state->ins_lock);
814
815 if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
816 mutex_exit(&state->ins_lock);
817 return;
818 }
819
820 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
821 mutex_exit(&state->ins_lock);
822 return;
823 }
824
825 VN_HOLD(vp);
826 watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
827 VERIFY(watch != NULL);
828
829 mutex_exit(&state->ins_lock);
830 }
831
832
833 static int
834 inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
835 int32_t *wdp)
836 {
837 inotify_watch_t *watch, cmp = { .inw_vp = vp };
838 uint32_t set;
839
840 set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
841
842 /*
843 * Lookup our vnode to determine if we already have a watch on it.
844 */
845 mutex_enter(&state->ins_lock);
846
847 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
848 /*
849 * We don't have this watch; allocate a new one, provided that
850 * we have fewer than our limit.
851 */
852 if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
853 mutex_exit(&state->ins_lock);
854 return (ENOSPC);
855 }
856
857 VN_HOLD(vp);
858 watch = inotify_watch_add(state, NULL, NULL, vp, set);
859 *wdp = watch->inw_wd;
860 mutex_exit(&state->ins_lock);
861
862 return (0);
863 }
864
865 VERIFY(!watch->inw_zombie);
866
867 if (!(mask & IN_MASK_ADD)) {
868 /*
869 * Note that if we're resetting our event mask and we're
870 * transitioning from an event mask that includes child events
871 * to one that doesn't, there will be potentially some stale
872 * child watches. This is basically fine: they won't fire,
873 * and they will correctly be removed when the watch is
874 * removed.
875 */
876 watch->inw_mask = 0;
877 }
878
879 watch->inw_mask |= set;
880
881 *wdp = watch->inw_wd;
882
883 mutex_exit(&state->ins_lock);
884
885 return (0);
886 }
887
888 static int
889 inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
890 {
891 inotify_watch_t *watch, cmp = { .inw_vp = vp };
892 vnode_t *cvp;
893 int err;
894
895 /*
896 * Verify that the specified child doesn't have a directory component
897 * within it.
898 */
899 if (strchr(name, '/') != NULL)
900 return (EINVAL);
901
902 /*
903 * Lookup the underlying file. Note that this will succeed even if
904 * we don't have permissions to actually read the file.
905 */
906 if ((err = lookupnameat(name,
907 UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
908 return (err);
909 }
910
911 /*
912 * Use our vnode to find our watch, and then add our child watch to it.
913 */
914 mutex_enter(&state->ins_lock);
915
916 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
917 /*
918 * This is unexpected -- it means that we don't have the
919 * watch that we thought we had.
920 */
921 mutex_exit(&state->ins_lock);
922 VN_RELE(cvp);
923 return (ENXIO);
924 }
925
926 /*
927 * Now lookup the child vnode in the watch; we'll only add it if it
928 * isn't already there.
929 */
930 cmp.inw_vp = cvp;
931
932 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
933 mutex_exit(&state->ins_lock);
934 VN_RELE(cvp);
935 return (0);
936 }
937
938 watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
939 VERIFY(watch != NULL);
940 mutex_exit(&state->ins_lock);
941
942 return (0);
943 }
944
945 static int
946 inotify_rm_watch(inotify_state_t *state, int32_t wd)
947 {
948 inotify_watch_t *watch, cmp = { .inw_wd = wd };
949
950 mutex_enter(&state->ins_lock);
951
952 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
953 mutex_exit(&state->ins_lock);
954 return (EINVAL);
955 }
956
957 inotify_watch_remove(state, watch);
958 mutex_exit(&state->ins_lock);
959
960 return (0);
961 }
962
963 static int
964 inotify_activate(inotify_state_t *state, int32_t wd)
965 {
966 inotify_watch_t *watch, cmp = { .inw_wd = wd };
967
968 mutex_enter(&state->ins_lock);
969
970 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
971 mutex_exit(&state->ins_lock);
972 return (EINVAL);
973 }
974
975 watch->inw_active = 1;
976
977 mutex_exit(&state->ins_lock);
978
979 return (0);
980 }
981
982 /*
983 * Called periodically as a cyclic to process the orphans and zombies.
984 */
985 static void
986 inotify_clean(void *arg)
987 {
988 inotify_state_t *state = arg;
989 inotify_watch_t *watch, *parent, *next, **prev;
990 cred_t *savecred;
991 int err;
992
993 mutex_enter(&state->ins_lock);
994
995 for (watch = list_head(&state->ins_orphans);
996 watch != NULL; watch = next) {
997 next = list_next(&state->ins_orphans, watch);
998
999 VERIFY(!watch->inw_zombie);
1000 VERIFY((parent = watch->inw_parent) != NULL);
1001
1002 if (watch->inw_vp->v_count > 1)
1003 continue;
1004
1005 avl_remove(&parent->inw_children, watch);
1006 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
1007 VERIFY(err == 0);
1008
1009 list_remove(&state->ins_orphans, watch);
1010
1011 /*
1012 * For purposes of releasing the vnode, we need to switch our
1013 * cred to be the cred of the orphaning thread (which we held
1014 * at the time this watch was orphaned).
1015 */
1016 savecred = curthread->t_cred;
1017 curthread->t_cred = watch->inw_cred;
1018 VN_RELE(watch->inw_vp);
1019 crfree(watch->inw_cred);
1020 curthread->t_cred = savecred;
1021
1022 inotify_watch_zombify(watch);
1023 }
1024
1025 prev = &state->ins_zombies;
1026
1027 while ((watch = *prev) != NULL) {
1028 mutex_enter(&watch->inw_lock);
1029
1030 if (watch->inw_refcnt == 1) {
1031 *prev = watch->inw_parent;
1032 inotify_watch_destroy(watch);
1033 continue;
1034 }
1035
1036 prev = &watch->inw_parent;
1037 mutex_exit(&watch->inw_lock);
1038 }
1039
1040 mutex_exit(&state->ins_lock);
1041 }
1042
1043 /*ARGSUSED*/
1044 static int
1045 inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1046 {
1047 inotify_state_t *state;
1048 major_t major = getemajor(*devp);
1049 minor_t minor = getminor(*devp);
1050 int instances = 0;
1051 char c[64];
1052
1053 if (minor != INOTIFYMNRN_INOTIFY)
1054 return (ENXIO);
1055
1056 mutex_enter(&inotify_lock);
1057
1058 for (state = inotify_state; state != NULL; state = state->ins_next) {
1059 if (state->ins_cred == cred_p)
1060 instances++;
1061 }
1062
1063 if (instances >= inotify_maxinstances) {
1064 mutex_exit(&inotify_lock);
1065 return (EMFILE);
1066 }
1067
1068 minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
1069 VM_BESTFIT | VM_SLEEP);
1070
1071 if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
1072 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1073 mutex_exit(&inotify_lock);
1074 return (NULL);
1075 }
1076
1077 state = ddi_get_soft_state(inotify_softstate, minor);
1078 *devp = makedevice(major, minor);
1079
1080 crhold(cred_p);
1081 state->ins_cred = cred_p;
1082 state->ins_next = inotify_state;
1083 inotify_state = state;
1084
1085 (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
1086 state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
1087 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
1088
1089 avl_create(&state->ins_bywd,
1090 (int(*)(const void *, const void *))inotify_watch_cmpwd,
1091 sizeof (inotify_watch_t),
1092 offsetof(inotify_watch_t, inw_bywd));
1093
1094 avl_create(&state->ins_byvp,
1095 (int(*)(const void *, const void *))inotify_watch_cmpvp,
1096 sizeof (inotify_watch_t),
1097 offsetof(inotify_watch_t, inw_byvp));
1098
1099 list_create(&state->ins_orphans, sizeof (inotify_watch_t),
1100 offsetof(inotify_watch_t, inw_orphan));
1101
1102 state->ins_maxwatches = inotify_maxwatches;
1103 state->ins_maxevents = inotify_maxevents;
1104
1105 mutex_exit(&inotify_lock);
1106
1107 state->ins_cleaner = ddi_periodic_add(inotify_clean,
1108 state, NANOSEC, DDI_IPL_0);
1109
1110 return (0);
1111 }
1112
1113 /*ARGSUSED*/
1114 static int
1115 inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
1116 {
1117 inotify_state_t *state;
1118 inotify_kevent_t *event;
1119 minor_t minor = getminor(dev);
1120 int err = 0, nevents = 0;
1121 size_t len;
1122
1123 state = ddi_get_soft_state(inotify_softstate, minor);
1124
1125 mutex_enter(&state->ins_lock);
1126
1127 while (state->ins_head == NULL) {
1128 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
1129 mutex_exit(&state->ins_lock);
1130 return (EAGAIN);
1131 }
1132
1133 if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
1134 mutex_exit(&state->ins_lock);
1135 return (EINTR);
1136 }
1137 }
1138
1139 /*
1140 * We have events and we have our lock; return as many as we can.
1141 */
1142 while ((event = state->ins_head) != NULL) {
1143 len = sizeof (event->ine_event) + event->ine_event.len;
1144
1145 if (uio->uio_resid < len) {
1146 if (nevents == 0)
1147 err = EINVAL;
1148 break;
1149 }
1150
1151 nevents++;
1152
1153 if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
1154 break;
1155
1156 VERIFY(state->ins_nevents > 0);
1157 state->ins_nevents--;
1158
1159 VERIFY(state->ins_size > 0);
1160 state->ins_size -= len;
1161
1162 if ((state->ins_head = event->ine_next) == NULL) {
1163 VERIFY(event == state->ins_tail);
1164 VERIFY(state->ins_nevents == 0);
1165 state->ins_tail = NULL;
1166 }
1167
1168 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1169 }
1170
1171 mutex_exit(&state->ins_lock);
1172
1173 return (err);
1174 }
1175
1176 /*ARGSUSED*/
1177 static int
1178 inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
1179 struct pollhead **phpp)
1180 {
1181 inotify_state_t *state;
1182 minor_t minor = getminor(dev);
1183
1184 state = ddi_get_soft_state(inotify_softstate, minor);
1185
1186 mutex_enter(&state->ins_lock);
1187
1188 if (state->ins_head != NULL) {
1189 *reventsp = events & (POLLRDNORM | POLLIN);
1190 } else {
1191 *reventsp = 0;
1192
1193 if (!anyyet)
1194 *phpp = &state->ins_pollhd;
1195 }
1196
1197 mutex_exit(&state->ins_lock);
1198
1199 return (0);
1200 }
1201
1202 /*ARGSUSED*/
1203 static int
1204 inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
1205 {
1206 inotify_state_t *state;
1207 minor_t minor = getminor(dev);
1208 file_t *fp;
1209 int rval;
1210
1211 state = ddi_get_soft_state(inotify_softstate, minor);
1212
1213 switch (cmd) {
1214 case INOTIFYIOC_ADD_WATCH: {
1215 inotify_addwatch_t addwatch;
1216 file_t *fp;
1217
1218 if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
1219 return (EFAULT);
1220
1221 if ((fp = getf(addwatch.inaw_fd)) == NULL)
1222 return (EBADF);
1223
1224 rval = inotify_add_watch(state, fp->f_vnode,
1225 addwatch.inaw_mask, rv);
1226
1227 releasef(addwatch.inaw_fd);
1228 return (rval);
1229 }
1230
1231 case INOTIFYIOC_ADD_CHILD: {
1232 inotify_addchild_t addchild;
1233 char name[MAXPATHLEN];
1234
1235 if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
1236 return (EFAULT);
1237
1238 if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
1239 return (EFAULT);
1240
1241 if ((fp = getf(addchild.inac_fd)) == NULL)
1242 return (EBADF);
1243
1244 rval = inotify_add_child(state, fp->f_vnode, name);
1245
1246 releasef(addchild.inac_fd);
1247 return (rval);
1248 }
1249
1250 case INOTIFYIOC_RM_WATCH:
1251 return (inotify_rm_watch(state, arg));
1252
1253 case INOTIFYIOC_ACTIVATE:
1254 return (inotify_activate(state, arg));
1255
1256 case FIONREAD: {
1257 int32_t size;
1258
1259 mutex_enter(&state->ins_lock);
1260 size = state->ins_size;
1261 mutex_exit(&state->ins_lock);
1262
1263 if (copyout(&size, (void *)arg, sizeof (size)) != 0)
1264 return (EFAULT);
1265
1266 return (0);
1267 }
1268
1269 default:
1270 break;
1271 }
1272
1273 return (ENOTTY);
1274 }
1275
1276 /*ARGSUSED*/
1277 static int
1278 inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
1279 {
1280 inotify_state_t *state, **sp;
1281 inotify_watch_t *watch, *zombies;
1282 inotify_kevent_t *event;
1283 minor_t minor = getminor(dev);
1284
1285 state = ddi_get_soft_state(inotify_softstate, minor);
1286
1287 if (state->ins_pollhd.ph_list != NULL) {
1288 pollwakeup(&state->ins_pollhd, POLLERR);
1289 pollhead_clean(&state->ins_pollhd);
1290 }
1291
1292 mutex_enter(&state->ins_lock);
1293
1294 /*
1295 * First, destroy all of our watches.
1296 */
1297 while ((watch = avl_first(&state->ins_bywd)) != NULL)
1298 inotify_watch_remove(state, watch);
1299
1300 /*
1301 * And now destroy our event queue.
1302 */
1303 while ((event = state->ins_head) != NULL) {
1304 state->ins_head = event->ine_next;
1305 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1306 }
1307
1308 zombies = state->ins_zombies;
1309 state->ins_zombies = NULL;
1310 mutex_exit(&state->ins_lock);
1311
1312 /*
1313 * Now that our state lock is dropped, we can synchronously wait on
1314 * any zombies.
1315 */
1316 while ((watch = zombies) != NULL) {
1317 zombies = zombies->inw_parent;
1318
1319 mutex_enter(&watch->inw_lock);
1320
1321 while (watch->inw_refcnt > 1)
1322 cv_wait(&watch->inw_cv, &watch->inw_lock);
1323
1324 inotify_watch_destroy(watch);
1325 }
1326
1327 if (state->ins_cleaner != NULL) {
1328 ddi_periodic_delete(state->ins_cleaner);
1329 state->ins_cleaner = NULL;
1330 }
1331
1332 mutex_enter(&inotify_lock);
1333
1334 /*
1335 * Remove our state from our global list, and release our hold on
1336 * the cred.
1337 */
1338 for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
1339 VERIFY(*sp != NULL);
1340
1341 *sp = (*sp)->ins_next;
1342 crfree(state->ins_cred);
1343 vmem_destroy(state->ins_wds);
1344
1345 ddi_soft_state_free(inotify_softstate, minor);
1346 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1347
1348 mutex_exit(&inotify_lock);
1349
1350 return (0);
1351 }
1352
1353 /*ARGSUSED*/
1354 static int
1355 inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1356 {
1357 mutex_enter(&inotify_lock);
1358
1359 if (ddi_soft_state_init(&inotify_softstate,
1360 sizeof (inotify_state_t), 0) != 0) {
1361 cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
1362 mutex_exit(&inotify_lock);
1363 return (DDI_FAILURE);
1364 }
1365
1366 if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
1367 INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
1368 cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
1369 ddi_soft_state_fini(&inotify_softstate);
1370 mutex_exit(&inotify_lock);
1371 return (DDI_FAILURE);
1372 }
1373
1374 if (fem_create("inotify_fem",
1375 inotify_vnodesrc_template, &inotify_femp) != 0) {
1376 cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
1377 ddi_remove_minor_node(devi, NULL);
1378 ddi_soft_state_fini(&inotify_softstate);
1379 mutex_exit(&inotify_lock);
1380 return (DDI_FAILURE);
1381 }
1382
1383 ddi_report_dev(devi);
1384 inotify_devi = devi;
1385
1386 inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
1387 UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
1388 VM_SLEEP | VMC_IDENTIFIER);
1389
1390 mutex_exit(&inotify_lock);
1391
1392 return (DDI_SUCCESS);
1393 }
1394
1395 /*ARGSUSED*/
1396 static int
1397 inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1398 {
1399 switch (cmd) {
1400 case DDI_DETACH:
1401 break;
1402
1403 case DDI_SUSPEND:
1404 return (DDI_SUCCESS);
1405
1406 default:
1407 return (DDI_FAILURE);
1408 }
1409
1410 mutex_enter(&inotify_lock);
1411 fem_free(inotify_femp);
1412 vmem_destroy(inotify_minor);
1413
1414 ddi_remove_minor_node(inotify_devi, NULL);
1415 inotify_devi = NULL;
1416
1417 ddi_soft_state_fini(&inotify_softstate);
1418 mutex_exit(&inotify_lock);
1419
1420 return (DDI_SUCCESS);
1421 }
1422
1423 /*ARGSUSED*/
1424 static int
1425 inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1426 {
1427 int error;
1428
1429 switch (infocmd) {
1430 case DDI_INFO_DEVT2DEVINFO:
1431 *result = (void *)inotify_devi;
1432 error = DDI_SUCCESS;
1433 break;
1434 case DDI_INFO_DEVT2INSTANCE:
1435 *result = (void *)0;
1436 error = DDI_SUCCESS;
1437 break;
1438 default:
1439 error = DDI_FAILURE;
1440 }
1441 return (error);
1442 }
1443
1444 static struct cb_ops inotify_cb_ops = {
1445 inotify_open, /* open */
1446 inotify_close, /* close */
1447 nulldev, /* strategy */
1448 nulldev, /* print */
1449 nodev, /* dump */
1450 inotify_read, /* read */
1451 nodev, /* write */
1452 inotify_ioctl, /* ioctl */
1453 nodev, /* devmap */
1454 nodev, /* mmap */
1455 nodev, /* segmap */
1456 inotify_poll, /* poll */
1457 ddi_prop_op, /* cb_prop_op */
1458 0, /* streamtab */
1459 D_NEW | D_MP /* Driver compatibility flag */
1460 };
1461
1462 static struct dev_ops inotify_ops = {
1463 DEVO_REV, /* devo_rev */
1464 0, /* refcnt */
1465 inotify_info, /* get_dev_info */
1466 nulldev, /* identify */
1467 nulldev, /* probe */
1468 inotify_attach, /* attach */
1469 inotify_detach, /* detach */
1470 nodev, /* reset */
1471 &inotify_cb_ops, /* driver operations */
1472 NULL, /* bus operations */
1473 nodev, /* dev power */
1474 ddi_quiesce_not_needed, /* quiesce */
1475 };
1476
1477 static struct modldrv modldrv = {
1478 &mod_driverops, /* module type (this is a pseudo driver) */
1479 "inotify support", /* name of module */
1480 &inotify_ops, /* driver ops */
1481 };
1482
1483 static struct modlinkage modlinkage = {
1484 MODREV_1,
1485 (void *)&modldrv,
1486 NULL
1487 };
1488
1489 int
1490 _init(void)
1491 {
1492 return (mod_install(&modlinkage));
1493 }
1494
1495 int
1496 _info(struct modinfo *modinfop)
1497 {
1498 return (mod_info(&modlinkage, modinfop));
1499 }
1500
1501 int
1502 _fini(void)
1503 {
1504 return (mod_remove(&modlinkage));
1505 }