Print this page
Reduce lint
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/inotify.c
+++ new/usr/src/uts/common/io/inotify.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 14 * Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
15 15 */
16 16
17 17 /*
18 18 * Support for the inotify facility, a Linux-borne facility for asynchronous
19 19 * notification of certain events on specified files or directories. Our
20 20 * implementation broadly leverages the file event monitoring facility, and
21 21 * would actually be quite straightforward were it not for a very serious
22 22 * blunder in the inotify interface: in addition to allowing for one to be
23 23 * notified on events on a particular file or directory, inotify also allows
24 24 * for one to be notified on certain events on files _within_ a watched
25 25 * directory -- even though those events have absolutely nothing to do with
26 26 * the directory itself. This leads to all sorts of madness because file
27 27 * operations are (of course) not undertaken on paths but rather on open
28 28 * files -- and the relationships between open files and the paths that resolve
29 29 * to those files are neither static nor isomorphic. We implement this
30 30 * concept by having _child watches_ when directories are watched with events
31 31 * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
32 32 * first added, and we modify those child watches dynamically as files are
33 33 * created, deleted, moved into or moved out of the specified directory. This
34 34 * mechanism works well, absent hard links. Hard links, unfortunately, break
35 35 * this rather badly, and the user is warned that watches on directories that
36 36 * have multiple directory entries referring to the same file may behave
37 37 * unexpectedly.
38 38 */
39 39
40 40 #include <sys/ddi.h>
41 41 #include <sys/sunddi.h>
42 42 #include <sys/inotify.h>
43 43 #include <sys/fem.h>
44 44 #include <sys/conf.h>
45 45 #include <sys/stat.h>
46 46 #include <sys/vfs_opreg.h>
47 47 #include <sys/vmem.h>
48 48 #include <sys/avl.h>
49 49 #include <sys/sysmacros.h>
50 50 #include <sys/cyclic.h>
51 51 #include <sys/filio.h>
52 52
53 53 struct inotify_state;
54 54 struct inotify_kevent;
55 55
56 56 typedef struct inotify_watch inotify_watch_t;
57 57 typedef struct inotify_state inotify_state_t;
58 58 typedef struct inotify_kevent inotify_kevent_t;
59 59
60 60 struct inotify_watch {
61 61 kmutex_t inw_lock; /* lock protecting ref count */
62 62 int inw_refcnt; /* reference count */
63 63 uint8_t inw_zombie:1; /* boolean: is zombie */
64 64 uint8_t inw_fired:1; /* boolean: fired one-shot */
65 65 uint8_t inw_active:1; /* boolean: watch is active */
66 66 uint8_t inw_orphaned:1; /* boolean: orphaned */
67 67 kcondvar_t inw_cv; /* condvar for zombifier */
68 68 uint32_t inw_mask; /* mask of watch */
69 69 int32_t inw_wd; /* watch descriptor */
70 70 vnode_t *inw_vp; /* underlying vnode */
71 71 inotify_watch_t *inw_parent; /* parent, if a child */
72 72 avl_node_t inw_byvp; /* watches by vnode */
73 73 avl_node_t inw_bywd; /* watches by descriptor */
74 74 avl_tree_t inw_children; /* children, if a parent */
75 75 char *inw_name; /* name, if a child */
76 76 list_node_t inw_orphan; /* orphan list */
77 77 cred_t *inw_cred; /* cred, if orphaned */
78 78 inotify_state_t *inw_state; /* corresponding state */
79 79 };
80 80
81 81 struct inotify_kevent {
82 82 inotify_kevent_t *ine_next; /* next event in queue */
83 83 struct inotify_event ine_event; /* event (variable size) */
84 84 };
85 85
86 86 #define INOTIFY_EVENT_LENGTH(ev) \
87 87 (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
88 88
89 89 struct inotify_state {
90 90 kmutex_t ins_lock; /* lock protecting state */
91 91 avl_tree_t ins_byvp; /* watches by vnode */
92 92 avl_tree_t ins_bywd; /* watches by descriptor */
93 93 vmem_t *ins_wds; /* watch identifier arena */
94 94 int ins_maxwatches; /* maximum number of watches */
95 95 int ins_maxevents; /* maximum number of events */
96 96 int ins_nevents; /* current # of events */
97 97 int32_t ins_size; /* total size of events */
98 98 inotify_kevent_t *ins_head; /* head of event queue */
99 99 inotify_kevent_t *ins_tail; /* tail of event queue */
100 100 pollhead_t ins_pollhd; /* poll head */
101 101 kcondvar_t ins_cv; /* condvar for reading */
102 102 list_t ins_orphans; /* orphan list */
103 103 ddi_periodic_t ins_cleaner; /* cyclic for cleaning */
104 104 inotify_watch_t *ins_zombies; /* zombie watch list */
105 105 cred_t *ins_cred; /* creator's credentials */
106 106 inotify_state_t *ins_next; /* next state on global list */
107 107 };
108 108
109 109 /*
110 110 * Tunables (exported read-only in lx-branded zones via /proc).
111 111 */
112 112 int inotify_maxwatches = 8192; /* max watches per instance */
113 113 int inotify_maxevents = 16384; /* max events */
114 114 int inotify_maxinstances = 128; /* max instances per user */
115 115
116 116 /*
117 117 * Internal global variables.
118 118 */
119 119 static kmutex_t inotify_lock; /* lock protecting state */
120 120 static dev_info_t *inotify_devi; /* device info */
121 121 static fem_t *inotify_femp; /* FEM pointer */
122 122 static vmem_t *inotify_minor; /* minor number arena */
123 123 static void *inotify_softstate; /* softstate pointer */
124 124 static inotify_state_t *inotify_state; /* global list if state */
125 125
126 126 static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
127 127 static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
128 128 static void inotify_watch_delete(inotify_watch_t *, uint32_t);
129 129 static void inotify_watch_remove(inotify_state_t *state,
130 130 inotify_watch_t *watch);
131 131
132 132 static int
133 133 inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
134 134 cred_t *cr, caller_context_t *ct)
135 135 {
136 136 inotify_watch_t *watch = vf->fa_fnode->fn_available;
137 137 int rval;
138 138
139 139 if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
140 140 inotify_watch_event(watch, flag & FWRITE ?
141 141 IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
142 142 }
143 143
144 144 return (rval);
145 145 }
146 146
147 147 static int
148 148 inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
149 149 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
150 150 vsecattr_t *vsecp)
151 151 {
152 152 inotify_watch_t *watch = vf->fa_fnode->fn_available;
153 153 int rval;
154 154
155 155 if ((rval = vnext_create(vf, name, vap, excl, mode,
156 156 vpp, cr, flag, ct, vsecp)) == 0) {
157 157 inotify_watch_insert(watch, *vpp, name);
158 158 inotify_watch_event(watch, IN_CREATE, name);
159 159 }
160 160
161 161 return (rval);
162 162 }
163 163
164 164 static int
165 165 inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
166 166 caller_context_t *ct, int flags)
167 167 {
168 168 inotify_watch_t *watch = vf->fa_fnode->fn_available;
169 169 int rval;
170 170
171 171 if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
172 172 inotify_watch_insert(watch, svp, tnm);
173 173 inotify_watch_event(watch, IN_CREATE, tnm);
174 174 }
175 175
176 176 return (rval);
177 177 }
178 178
179 179 static int
180 180 inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
181 181 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
182 182 {
183 183 inotify_watch_t *watch = vf->fa_fnode->fn_available;
184 184 int rval;
185 185
186 186 if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
187 187 ct, flags, vsecp)) == 0) {
188 188 inotify_watch_insert(watch, *vpp, name);
189 189 inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
190 190 }
191 191
192 192 return (rval);
193 193 }
194 194
195 195 static int
196 196 inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
197 197 {
198 198 inotify_watch_t *watch = vf->fa_fnode->fn_available;
199 199 int rval;
200 200
201 201 if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
202 202 inotify_watch_event(watch, IN_OPEN, NULL);
203 203
204 204 return (rval);
205 205 }
206 206
207 207 static int
208 208 inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
209 209 caller_context_t *ct)
210 210 {
211 211 inotify_watch_t *watch = vf->fa_fnode->fn_available;
212 212 int rval = vnext_read(vf, uiop, ioflag, cr, ct);
213 213 inotify_watch_event(watch, IN_ACCESS, NULL);
214 214
215 215 return (rval);
216 216 }
217 217
218 218 static int
219 219 inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
220 220 caller_context_t *ct, int flags)
221 221 {
222 222 inotify_watch_t *watch = vf->fa_fnode->fn_available;
223 223 int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
224 224 inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
225 225
226 226 return (rval);
227 227 }
228 228
229 229 int
230 230 inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
231 231 int flags)
232 232 {
233 233 inotify_watch_t *watch = vf->fa_fnode->fn_available;
234 234 int rval;
235 235
236 236 if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
237 237 inotify_watch_event(watch, IN_DELETE, nm);
238 238
239 239 return (rval);
240 240 }
241 241
242 242 int
243 243 inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
244 244 caller_context_t *ct, int flags)
245 245 {
246 246 inotify_watch_t *watch = vf->fa_fnode->fn_available;
247 247 int rval;
248 248
249 249 if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
250 250 inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
251 251
252 252 return (rval);
253 253 }
254 254
255 255 static int
256 256 inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
257 257 caller_context_t *ct)
258 258 {
259 259 inotify_watch_t *watch = vf->fa_fnode->fn_available;
260 260 int rval;
261 261
262 262 if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
263 263 inotify_watch_event(watch, IN_ATTRIB, NULL);
264 264
265 265 return (rval);
266 266 }
267 267
268 268 static int
269 269 inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
270 270 caller_context_t *ct)
271 271 {
272 272 inotify_watch_t *watch = vf->fa_fnode->fn_available;
273 273 int rval = vnext_write(vf, uiop, ioflag, cr, ct);
274 274 inotify_watch_event(watch, IN_MODIFY, NULL);
275 275
276 276 return (rval);
277 277 }
278 278
279 279 static int
280 280 inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
281 281 caller_context_t *ct)
282 282 {
283 283 inotify_watch_t *watch = vf->fa_fnode->fn_available;
284 284
285 285 switch (vnevent) {
286 286 case VE_RENAME_SRC:
287 287 inotify_watch_event(watch, IN_MOVE_SELF, NULL);
288 288 inotify_watch_delete(watch, IN_MOVE_SELF);
289 289 break;
290 290 case VE_REMOVE:
291 291 /*
292 292 * Linux will apparently fire an IN_ATTRIB event when the link
293 293 * count changes (including when it drops to 0 on a remove).
294 294 * This is merely somewhat odd; what is amazing is that this
295 295 * IN_ATTRIB event is not visible on an inotify watch on the
296 296 * parent directory. (IN_ATTRIB events are normally sent to
297 297 * watches on the parent directory). While it's hard to
298 298 * believe that this constitutes desired semantics, ltp
299 299 * unfortunately tests this case (if implicitly); in the name
300 300 * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
301 301 * explicitly watching the file that has been removed.
302 302 */
303 303 if (watch->inw_parent == NULL)
304 304 inotify_watch_event(watch, IN_ATTRIB, NULL);
305 305
306 306 /*FALLTHROUGH*/
307 307 case VE_RENAME_DEST:
308 308 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
309 309 inotify_watch_delete(watch, IN_DELETE_SELF);
310 310 break;
311 311 case VE_RMDIR:
312 312 /*
313 313 * It seems that IN_ISDIR should really be OR'd in here, but
314 314 * Linux doesn't seem to do that in this case; for the sake of
315 315 * bug-for-bug compatibility, we don't do it either.
316 316 */
317 317 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
318 318 inotify_watch_delete(watch, IN_DELETE_SELF);
319 319 break;
320 320 case VE_CREATE:
321 321 case VE_TRUNCATE:
322 322 case VE_RESIZE:
323 323 inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
324 324 break;
325 325 case VE_LINK:
326 326 inotify_watch_event(watch, IN_ATTRIB, NULL);
327 327 break;
328 328 case VE_RENAME_SRC_DIR:
329 329 inotify_watch_event(watch, IN_MOVED_FROM, name);
330 330 break;
331 331 case VE_RENAME_DEST_DIR:
332 332 if (name == NULL)
333 333 name = dvp->v_path;
334 334
335 335 inotify_watch_insert(watch, dvp, name);
336 336 inotify_watch_event(watch, IN_MOVED_TO, name);
337 337 break;
338 338 case VE_SUPPORT:
339 339 case VE_MOUNTEDOVER:
340 340 case VE_PRE_RENAME_SRC:
341 341 case VE_PRE_RENAME_DEST:
342 342 case VE_PRE_RENAME_DEST_DIR:
343 343 break;
344 344 }
345 345
346 346 return (vnext_vnevent(vf, vnevent, dvp, name, ct));
347 347 }
348 348
349 349 const fs_operation_def_t inotify_vnodesrc_template[] = {
350 350 VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
351 351 VOPNAME_CREATE, { .femop_create = inotify_fop_create },
352 352 VOPNAME_LINK, { .femop_link = inotify_fop_link },
353 353 VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
354 354 VOPNAME_OPEN, { .femop_open = inotify_fop_open },
355 355 VOPNAME_READ, { .femop_read = inotify_fop_read },
356 356 VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
357 357 VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
358 358 VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
359 359 VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
360 360 VOPNAME_WRITE, { .femop_write = inotify_fop_write },
361 361 VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
362 362 NULL, NULL
363 363 };
364 364
365 365 static int
366 366 inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
367 367 {
368 368 if (lhs->inw_wd < rhs->inw_wd)
369 369 return (-1);
370 370
371 371 if (lhs->inw_wd > rhs->inw_wd)
372 372 return (1);
373 373
374 374 return (0);
375 375 }
376 376
377 377 static int
378 378 inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
379 379 {
380 380 uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
381 381
382 382 if (lvp < rvp)
383 383 return (-1);
384 384
385 385 if (lvp > rvp)
386 386 return (1);
387 387
388 388 return (0);
389 389 }
390 390
391 391 static void
392 392 inotify_watch_hold(inotify_watch_t *watch)
393 393 {
394 394 mutex_enter(&watch->inw_lock);
395 395 VERIFY(watch->inw_refcnt > 0);
396 396 watch->inw_refcnt++;
397 397 mutex_exit(&watch->inw_lock);
398 398 }
399 399
400 400 static void
401 401 inotify_watch_release(inotify_watch_t *watch)
402 402 {
403 403 mutex_enter(&watch->inw_lock);
404 404 VERIFY(watch->inw_refcnt > 1);
405 405
406 406 if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
407 407 /*
408 408 * We're down to our last reference; kick anyone that might be
409 409 * waiting.
410 410 */
411 411 cv_signal(&watch->inw_cv);
412 412 }
413 413
414 414 mutex_exit(&watch->inw_lock);
415 415 }
416 416
417 417 static void
418 418 inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
419 419 {
420 420 inotify_kevent_t *event, *tail;
421 421 inotify_state_t *state = watch->inw_state;
422 422 uint32_t wd = watch->inw_wd, cookie = 0, len;
423 423 boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
424 424 inotify_watch_t *source = watch;
425 425
426 426 if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
427 427 return;
428 428
429 429 if (watch->inw_parent != NULL) {
430 430 /*
431 431 * This is an event on the child; if this isn't a valid child
432 432 * event, return. Otherwise, we move our watch to be our
433 433 * parent (which we know is around because we have a hold on
434 434 * it) and continue.
435 435 */
436 436 if (!(mask & IN_CHILD_EVENTS))
437 437 return;
438 438
439 439 name = watch->inw_name;
440 440 watch = watch->inw_parent;
441 441 wd = watch->inw_wd;
442 442 }
443 443
444 444 if (!removal) {
445 445 mutex_enter(&state->ins_lock);
446 446
447 447 if (watch->inw_zombie ||
448 448 watch->inw_fired || !watch->inw_active) {
449 449 mutex_exit(&state->ins_lock);
450 450 return;
451 451 }
452 452 } else {
453 453 if (!watch->inw_active)
454 454 return;
455 455
456 456 VERIFY(MUTEX_HELD(&state->ins_lock));
457 457 }
458 458
459 459 /*
460 460 * If this is an operation on a directory and it's a child event
461 461 * (event if it's not on a child), we specify IN_ISDIR.
462 462 */
463 463 if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
464 464 mask |= IN_ISDIR;
465 465
466 466 if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
467 467 cookie = (uint32_t)curthread->t_did;
468 468
469 469 if (state->ins_nevents >= state->ins_maxevents) {
470 470 /*
471 471 * We're at our maximum number of events -- turn our event
472 472 * into an IN_Q_OVERFLOW event, which will be coalesced if
473 473 * it's already the tail event.
474 474 */
475 475 mask = IN_Q_OVERFLOW;
476 476 wd = (uint32_t)-1;
477 477 cookie = 0;
478 478 len = 0;
479 479 }
480 480
481 481 if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
482 482 tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
483 483 ((tail->ine_event.len == 0 && len == 0) ||
484 484 (name != NULL && tail->ine_event.len != 0 &&
485 485 strcmp(tail->ine_event.name, name) == 0))) {
486 486 /*
487 487 * This is an implicitly coalesced event; we're done.
488 488 */
489 489 if (!removal)
490 490 mutex_exit(&state->ins_lock);
491 491 return;
492 492 }
493 493
494 494 if (name != NULL) {
495 495 len = strlen(name) + 1;
496 496 len = roundup(len, sizeof (struct inotify_event));
497 497 } else {
|
↓ open down ↓ |
497 lines elided |
↑ open up ↑ |
498 498 len = 0;
499 499 }
500 500
501 501 event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
502 502 event->ine_event.wd = wd;
503 503 event->ine_event.mask = (uint32_t)mask;
504 504 event->ine_event.cookie = cookie;
505 505 event->ine_event.len = len;
506 506
507 507 if (name != NULL)
508 - strcpy(event->ine_event.name, name);
508 + (void) strcpy(event->ine_event.name, name);
509 509
510 510 if (tail != NULL) {
511 511 tail->ine_next = event;
512 512 } else {
513 513 VERIFY(state->ins_head == NULL);
514 514 state->ins_head = event;
515 515 cv_broadcast(&state->ins_cv);
516 516 }
517 517
518 518 state->ins_tail = event;
519 519 state->ins_nevents++;
520 520 state->ins_size += sizeof (event->ine_event) + len;
521 521
522 522 if (removal)
523 523 return;
524 524
525 525 if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
526 526 /*
527 527 * If this is a one-shot, we need to remove the watch. (Note
528 528 * that this will recurse back into inotify_watch_event() to
529 529 * fire the IN_IGNORED event -- but with "removal" set.)
530 530 */
531 531 watch->inw_fired = 1;
532 532 inotify_watch_remove(state, watch);
533 533 }
534 534
535 535 mutex_exit(&state->ins_lock);
536 536 pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
537 537 }
538 538
539 539 /*
540 540 * Destroy a watch. By the time we're in here, the watch must have exactly
541 541 * one reference.
542 542 */
543 543 static void
544 544 inotify_watch_destroy(inotify_watch_t *watch)
545 545 {
546 546 VERIFY(MUTEX_HELD(&watch->inw_lock));
547 547
548 548 if (watch->inw_name != NULL)
549 549 kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
550 550
551 551 kmem_free(watch, sizeof (inotify_watch_t));
552 552 }
553 553
554 554 /*
555 555 * Zombify a watch. By the time we come in here, it must be true that the
556 556 * watch has already been fem_uninstall()'d -- the only reference should be
557 557 * in the state's data structure. If we can get away with freeing it, we'll
558 558 * do that -- but if the reference count is greater than one due to an active
559 559 * vnode operation, we'll put this watch on the zombie list on the state
560 560 * structure.
561 561 */
562 562 static void
563 563 inotify_watch_zombify(inotify_watch_t *watch)
564 564 {
565 565 inotify_state_t *state = watch->inw_state;
566 566
567 567 VERIFY(MUTEX_HELD(&state->ins_lock));
568 568 VERIFY(!watch->inw_zombie);
569 569
570 570 watch->inw_zombie = 1;
571 571
572 572 if (watch->inw_parent != NULL) {
573 573 inotify_watch_release(watch->inw_parent);
574 574 } else {
575 575 avl_remove(&state->ins_byvp, watch);
576 576 avl_remove(&state->ins_bywd, watch);
577 577 vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
578 578 watch->inw_wd = -1;
579 579 }
580 580
581 581 mutex_enter(&watch->inw_lock);
582 582
583 583 if (watch->inw_refcnt == 1) {
584 584 /*
585 585 * There are no operations in flight and there is no way
586 586 * for anyone to discover this watch -- we can destroy it.
587 587 */
588 588 inotify_watch_destroy(watch);
589 589 } else {
590 590 /*
591 591 * There are operations in flight; we will need to enqueue
592 592 * this for later destruction.
593 593 */
594 594 watch->inw_parent = state->ins_zombies;
595 595 state->ins_zombies = watch;
596 596 mutex_exit(&watch->inw_lock);
597 597 }
598 598 }
599 599
600 600 static inotify_watch_t *
601 601 inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
602 602 const char *name, vnode_t *vp, uint32_t mask)
603 603 {
604 604 inotify_watch_t *watch;
605 605 int err;
606 606
607 607 VERIFY(MUTEX_HELD(&state->ins_lock));
608 608
609 609 watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
610 610
611 611 watch->inw_vp = vp;
612 612 watch->inw_mask = mask;
613 613 watch->inw_state = state;
614 614 watch->inw_refcnt = 1;
615 615
616 616 if (parent == NULL) {
617 617 watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
618 618 1, VM_BESTFIT | VM_SLEEP);
619 619 avl_add(&state->ins_byvp, watch);
620 620 avl_add(&state->ins_bywd, watch);
621 621
622 622 avl_create(&watch->inw_children,
623 623 (int(*)(const void *, const void *))inotify_watch_cmpvp,
624 624 sizeof (inotify_watch_t),
625 625 offsetof(inotify_watch_t, inw_byvp));
626 626 } else {
627 627 VERIFY(name != NULL);
628 628 inotify_watch_hold(parent);
629 629 watch->inw_mask &= IN_CHILD_EVENTS;
630 630 watch->inw_parent = parent;
631 631 watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
632 632 strcpy(watch->inw_name, name);
633 633
634 634 avl_add(&parent->inw_children, watch);
635 635 }
636 636
637 637 /*
638 638 * Add our monitor to the vnode. We must not have the watch lock held
639 639 * when we do this, as it will immediately hold our watch.
640 640 */
641 641 err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
642 642 (void (*)(void *))inotify_watch_hold,
643 643 (void (*)(void *))inotify_watch_release);
644 644
645 645 VERIFY(err == 0);
646 646
647 647 return (watch);
648 648 }
649 649
650 650 /*
651 651 * Remove a (non-child) watch. This is called from either synchronous context
652 652 * via inotify_rm_watch() or monitor context via either a vnevent or a
653 653 * one-shot.
654 654 */
655 655 static void
656 656 inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
657 657 {
658 658 inotify_watch_t *child;
659 659 int err;
660 660
661 661 VERIFY(MUTEX_HELD(&state->ins_lock));
662 662 VERIFY(watch->inw_parent == NULL);
663 663
664 664 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
665 665 VERIFY(err == 0);
666 666
667 667 /*
668 668 * If we have children, we're going to remove them all and set them
669 669 * all to be zombies.
670 670 */
671 671 while ((child = avl_first(&watch->inw_children)) != NULL) {
672 672 VERIFY(child->inw_parent == watch);
673 673 avl_remove(&watch->inw_children, child);
674 674
675 675 err = fem_uninstall(child->inw_vp, inotify_femp, child);
676 676 VERIFY(err == 0);
677 677
678 678 /*
679 679 * If this child watch has been orphaned, remove it from the
680 680 * state's list of orphans.
681 681 */
682 682 if (child->inw_orphaned) {
683 683 list_remove(&state->ins_orphans, child);
684 684 crfree(child->inw_cred);
685 685 }
686 686
687 687 VN_RELE(child->inw_vp);
688 688
689 689 /*
690 690 * We're down (or should be down) to a single reference to
691 691 * this child watch; it's safe to zombify it.
692 692 */
693 693 inotify_watch_zombify(child);
694 694 }
695 695
696 696 inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
697 697 VN_RELE(watch->inw_vp);
698 698
699 699 /*
700 700 * It's now safe to zombify the watch -- we know that the only reference
701 701 * can come from operations in flight.
702 702 */
703 703 inotify_watch_zombify(watch);
704 704 }
705 705
706 706 /*
707 707 * Delete a watch. Should only be called from VOP context.
708 708 */
709 709 static void
710 710 inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
711 711 {
712 712 inotify_state_t *state = watch->inw_state;
713 713 inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
714 714 int err;
715 715
716 716 if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
717 717 return;
718 718
719 719 mutex_enter(&state->ins_lock);
720 720
721 721 if (watch->inw_zombie) {
722 722 mutex_exit(&state->ins_lock);
723 723 return;
724 724 }
725 725
726 726 if ((parent = watch->inw_parent) == NULL) {
727 727 if (event == IN_DELETE_SELF) {
728 728 /*
729 729 * If we're here because we're being deleted and we
730 730 * are not a child watch, we need to delete the entire
731 731 * watch, children and all.
732 732 */
733 733 inotify_watch_remove(state, watch);
734 734 }
735 735
736 736 mutex_exit(&state->ins_lock);
737 737 return;
738 738 } else {
739 739 if (event == IN_DELETE_SELF &&
740 740 !(parent->inw_mask & IN_EXCL_UNLINK)) {
741 741 /*
742 742 * This is a child watch for a file that is being
743 743 * removed and IN_EXCL_UNLINK has not been specified;
744 744 * indicate that it is orphaned and add it to the list
745 745 * of orphans. (This list will be checked by the
746 746 * cleaning cyclic to determine when the watch has
747 747 * become the only hold on the vnode, at which point
748 748 * the watch can be zombified.) Note that we check
749 749 * if the watch is orphaned before we orphan it: hard
750 750 * links make it possible for VE_REMOVE to be called
751 751 * multiple times on the same vnode. (!)
752 752 */
753 753 if (!watch->inw_orphaned) {
754 754 watch->inw_orphaned = 1;
755 755 watch->inw_cred = CRED();
756 756 crhold(watch->inw_cred);
757 757 list_insert_head(&state->ins_orphans, watch);
758 758 }
759 759
760 760 mutex_exit(&state->ins_lock);
761 761 return;
762 762 }
763 763
764 764 if (watch->inw_orphaned) {
765 765 /*
766 766 * If we're here, a file was orphaned and then later
767 767 * moved -- which almost certainly means that hard
768 768 * links are on the scene. We choose the orphan over
769 769 * the move because we don't want to spuriously
770 770 * drop events if we can avoid it.
771 771 */
772 772 crfree(watch->inw_cred);
773 773 list_remove(&state->ins_orphans, watch);
774 774 }
775 775 }
776 776
777 777 if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
778 778 /*
779 779 * This watch has already been deleted from the parent.
780 780 */
781 781 mutex_exit(&state->ins_lock);
782 782 return;
783 783 }
784 784
785 785 avl_remove(&parent->inw_children, watch);
786 786 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
787 787 VERIFY(err == 0);
788 788
789 789 VN_RELE(watch->inw_vp);
790 790
791 791 /*
792 792 * It's now safe to zombify the watch -- which won't actually delete
793 793 * it as we know that the reference count is greater than 1.
794 794 */
795 795 inotify_watch_zombify(watch);
796 796 mutex_exit(&state->ins_lock);
797 797 }
798 798
799 799 /*
800 800 * Insert a new child watch. Should only be called from VOP context when
801 801 * a child is created in a watched directory.
802 802 */
803 803 static void
804 804 inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
805 805 {
806 806 inotify_state_t *state = watch->inw_state;
807 807 inotify_watch_t cmp = { .inw_vp = vp };
808 808
809 809 if (!(watch->inw_mask & IN_CHILD_EVENTS))
810 810 return;
811 811
812 812 mutex_enter(&state->ins_lock);
813 813
814 814 if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
815 815 mutex_exit(&state->ins_lock);
816 816 return;
817 817 }
818 818
819 819 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
820 820 mutex_exit(&state->ins_lock);
821 821 return;
822 822 }
823 823
824 824 VN_HOLD(vp);
825 825 watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
826 826 VERIFY(watch != NULL);
827 827
828 828 mutex_exit(&state->ins_lock);
829 829 }
830 830
831 831
832 832 static int
833 833 inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
834 834 int32_t *wdp)
835 835 {
836 836 inotify_watch_t *watch, cmp = { .inw_vp = vp };
837 837 uint32_t set;
838 838
839 839 set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
840 840
841 841 /*
842 842 * Lookup our vnode to determine if we already have a watch on it.
843 843 */
844 844 mutex_enter(&state->ins_lock);
845 845
846 846 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
847 847 /*
848 848 * We don't have this watch; allocate a new one, provided that
849 849 * we have fewer than our limit.
850 850 */
851 851 if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
852 852 mutex_exit(&state->ins_lock);
853 853 return (ENOSPC);
854 854 }
855 855
856 856 VN_HOLD(vp);
857 857 watch = inotify_watch_add(state, NULL, NULL, vp, set);
858 858 *wdp = watch->inw_wd;
859 859 mutex_exit(&state->ins_lock);
860 860
861 861 return (0);
862 862 }
863 863
864 864 VERIFY(!watch->inw_zombie);
865 865
866 866 if (!(mask & IN_MASK_ADD)) {
867 867 /*
868 868 * Note that if we're resetting our event mask and we're
869 869 * transitioning from an event mask that includes child events
870 870 * to one that doesn't, there will be potentially some stale
871 871 * child watches. This is basically fine: they won't fire,
872 872 * and they will correctly be removed when the watch is
873 873 * removed.
874 874 */
875 875 watch->inw_mask = 0;
876 876 }
877 877
878 878 watch->inw_mask |= set;
879 879
880 880 *wdp = watch->inw_wd;
881 881
882 882 mutex_exit(&state->ins_lock);
883 883
884 884 return (0);
885 885 }
886 886
887 887 static int
888 888 inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
889 889 {
890 890 inotify_watch_t *watch, cmp = { .inw_vp = vp };
891 891 vnode_t *cvp;
892 892 int err;
893 893
894 894 /*
895 895 * Verify that the specified child doesn't have a directory component
896 896 * within it.
897 897 */
898 898 if (strchr(name, '/') != NULL)
899 899 return (EINVAL);
900 900
901 901 /*
902 902 * Lookup the underlying file. Note that this will succeed even if
903 903 * we don't have permissions to actually read the file.
904 904 */
905 905 if ((err = lookupnameat(name,
906 906 UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
907 907 return (err);
908 908 }
909 909
910 910 /*
911 911 * Use our vnode to find our watch, and then add our child watch to it.
912 912 */
913 913 mutex_enter(&state->ins_lock);
914 914
915 915 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
916 916 /*
917 917 * This is unexpected -- it means that we don't have the
918 918 * watch that we thought we had.
919 919 */
920 920 mutex_exit(&state->ins_lock);
921 921 VN_RELE(cvp);
922 922 return (ENXIO);
923 923 }
924 924
925 925 /*
926 926 * Now lookup the child vnode in the watch; we'll only add it if it
927 927 * isn't already there.
928 928 */
929 929 cmp.inw_vp = cvp;
930 930
931 931 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
932 932 mutex_exit(&state->ins_lock);
933 933 VN_RELE(cvp);
934 934 return (0);
935 935 }
936 936
937 937 watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
938 938 VERIFY(watch != NULL);
939 939 mutex_exit(&state->ins_lock);
940 940
941 941 return (0);
942 942 }
943 943
944 944 static int
945 945 inotify_rm_watch(inotify_state_t *state, int32_t wd)
946 946 {
947 947 inotify_watch_t *watch, cmp = { .inw_wd = wd };
948 948
949 949 mutex_enter(&state->ins_lock);
950 950
951 951 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
952 952 mutex_exit(&state->ins_lock);
953 953 return (EINVAL);
954 954 }
955 955
956 956 inotify_watch_remove(state, watch);
957 957 mutex_exit(&state->ins_lock);
958 958
959 959 return (0);
960 960 }
961 961
962 962 static int
963 963 inotify_activate(inotify_state_t *state, int32_t wd)
964 964 {
965 965 inotify_watch_t *watch, cmp = { .inw_wd = wd };
966 966
967 967 mutex_enter(&state->ins_lock);
968 968
969 969 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
970 970 mutex_exit(&state->ins_lock);
971 971 return (EINVAL);
972 972 }
973 973
974 974 watch->inw_active = 1;
975 975
976 976 mutex_exit(&state->ins_lock);
977 977
978 978 return (0);
979 979 }
980 980
981 981 /*
982 982 * Called periodically as a cyclic to process the orphans and zombies.
983 983 */
984 984 static void
985 985 inotify_clean(void *arg)
986 986 {
987 987 inotify_state_t *state = arg;
988 988 inotify_watch_t *watch, *parent, *next, **prev;
989 989 cred_t *savecred;
990 990 int err;
991 991
992 992 mutex_enter(&state->ins_lock);
993 993
994 994 for (watch = list_head(&state->ins_orphans);
995 995 watch != NULL; watch = next) {
996 996 next = list_next(&state->ins_orphans, watch);
997 997
998 998 VERIFY(!watch->inw_zombie);
999 999 VERIFY((parent = watch->inw_parent) != NULL);
1000 1000
1001 1001 if (watch->inw_vp->v_count > 1)
1002 1002 continue;
1003 1003
1004 1004 avl_remove(&parent->inw_children, watch);
1005 1005 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
1006 1006 VERIFY(err == 0);
1007 1007
1008 1008 list_remove(&state->ins_orphans, watch);
1009 1009
1010 1010 /*
1011 1011 * For purposes of releasing the vnode, we need to switch our
1012 1012 * cred to be the cred of the orphaning thread (which we held
1013 1013 * at the time this watch was orphaned).
1014 1014 */
1015 1015 savecred = curthread->t_cred;
1016 1016 curthread->t_cred = watch->inw_cred;
1017 1017 VN_RELE(watch->inw_vp);
1018 1018 crfree(watch->inw_cred);
1019 1019 curthread->t_cred = savecred;
1020 1020
1021 1021 inotify_watch_zombify(watch);
1022 1022 }
1023 1023
1024 1024 prev = &state->ins_zombies;
1025 1025
1026 1026 while ((watch = *prev) != NULL) {
1027 1027 mutex_enter(&watch->inw_lock);
1028 1028
1029 1029 if (watch->inw_refcnt == 1) {
1030 1030 *prev = watch->inw_parent;
1031 1031 inotify_watch_destroy(watch);
1032 1032 continue;
1033 1033 }
1034 1034
1035 1035 prev = &watch->inw_parent;
1036 1036 mutex_exit(&watch->inw_lock);
1037 1037 }
1038 1038
1039 1039 mutex_exit(&state->ins_lock);
1040 1040 }
1041 1041
1042 1042 /*ARGSUSED*/
1043 1043 static int
1044 1044 inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1045 1045 {
1046 1046 inotify_state_t *state;
1047 1047 major_t major = getemajor(*devp);
1048 1048 minor_t minor = getminor(*devp);
1049 1049 int instances = 0;
1050 1050 char c[64];
1051 1051
1052 1052 if (minor != INOTIFYMNRN_INOTIFY)
1053 1053 return (ENXIO);
1054 1054
1055 1055 mutex_enter(&inotify_lock);
1056 1056
1057 1057 for (state = inotify_state; state != NULL; state = state->ins_next) {
1058 1058 if (state->ins_cred == cred_p)
1059 1059 instances++;
1060 1060 }
1061 1061
1062 1062 if (instances >= inotify_maxinstances) {
1063 1063 mutex_exit(&inotify_lock);
1064 1064 return (EMFILE);
1065 1065 }
1066 1066
1067 1067 minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
1068 1068 VM_BESTFIT | VM_SLEEP);
1069 1069
1070 1070 if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
1071 1071 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1072 1072 mutex_exit(&inotify_lock);
1073 1073 return (NULL);
1074 1074 }
1075 1075
1076 1076 state = ddi_get_soft_state(inotify_softstate, minor);
1077 1077 *devp = makedevice(major, minor);
1078 1078
1079 1079 crhold(cred_p);
1080 1080 state->ins_cred = cred_p;
1081 1081 state->ins_next = inotify_state;
1082 1082 inotify_state = state;
1083 1083
1084 1084 (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
1085 1085 state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
1086 1086 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
1087 1087
1088 1088 avl_create(&state->ins_bywd,
1089 1089 (int(*)(const void *, const void *))inotify_watch_cmpwd,
1090 1090 sizeof (inotify_watch_t),
1091 1091 offsetof(inotify_watch_t, inw_bywd));
1092 1092
1093 1093 avl_create(&state->ins_byvp,
1094 1094 (int(*)(const void *, const void *))inotify_watch_cmpvp,
1095 1095 sizeof (inotify_watch_t),
1096 1096 offsetof(inotify_watch_t, inw_byvp));
1097 1097
1098 1098 list_create(&state->ins_orphans, sizeof (inotify_watch_t),
1099 1099 offsetof(inotify_watch_t, inw_orphan));
1100 1100
1101 1101 state->ins_maxwatches = inotify_maxwatches;
1102 1102 state->ins_maxevents = inotify_maxevents;
1103 1103
1104 1104 mutex_exit(&inotify_lock);
1105 1105
1106 1106 state->ins_cleaner = ddi_periodic_add(inotify_clean,
1107 1107 state, NANOSEC, DDI_IPL_0);
1108 1108
1109 1109 return (0);
1110 1110 }
1111 1111
1112 1112 /*ARGSUSED*/
1113 1113 static int
1114 1114 inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
1115 1115 {
1116 1116 inotify_state_t *state;
1117 1117 inotify_kevent_t *event;
1118 1118 minor_t minor = getminor(dev);
1119 1119 int err = 0, nevents = 0;
1120 1120 size_t len;
1121 1121
1122 1122 state = ddi_get_soft_state(inotify_softstate, minor);
1123 1123
1124 1124 mutex_enter(&state->ins_lock);
1125 1125
1126 1126 while (state->ins_head == NULL) {
1127 1127 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
1128 1128 mutex_exit(&state->ins_lock);
1129 1129 return (EAGAIN);
1130 1130 }
1131 1131
1132 1132 if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
1133 1133 mutex_exit(&state->ins_lock);
1134 1134 return (EINTR);
1135 1135 }
1136 1136 }
1137 1137
1138 1138 /*
1139 1139 * We have events and we have our lock; return as many as we can.
1140 1140 */
1141 1141 while ((event = state->ins_head) != NULL) {
1142 1142 len = sizeof (event->ine_event) + event->ine_event.len;
1143 1143
1144 1144 if (uio->uio_resid < len) {
1145 1145 if (nevents == 0)
1146 1146 err = EINVAL;
1147 1147 break;
1148 1148 }
1149 1149
1150 1150 nevents++;
1151 1151
1152 1152 if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
1153 1153 break;
1154 1154
1155 1155 VERIFY(state->ins_nevents > 0);
1156 1156 state->ins_nevents--;
1157 1157
1158 1158 VERIFY(state->ins_size > 0);
1159 1159 state->ins_size -= len;
1160 1160
1161 1161 if ((state->ins_head = event->ine_next) == NULL) {
1162 1162 VERIFY(event == state->ins_tail);
1163 1163 VERIFY(state->ins_nevents == 0);
1164 1164 state->ins_tail = NULL;
1165 1165 }
1166 1166
1167 1167 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1168 1168 }
1169 1169
1170 1170 mutex_exit(&state->ins_lock);
1171 1171
1172 1172 return (err);
1173 1173 }
1174 1174
1175 1175 /*ARGSUSED*/
1176 1176 static int
1177 1177 inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
1178 1178 struct pollhead **phpp)
1179 1179 {
1180 1180 inotify_state_t *state;
1181 1181 minor_t minor = getminor(dev);
1182 1182
1183 1183 state = ddi_get_soft_state(inotify_softstate, minor);
1184 1184
1185 1185 mutex_enter(&state->ins_lock);
1186 1186
1187 1187 if (state->ins_head != NULL) {
1188 1188 *reventsp = events & (POLLRDNORM | POLLIN);
1189 1189 } else {
1190 1190 *reventsp = 0;
1191 1191
1192 1192 if (!anyyet)
1193 1193 *phpp = &state->ins_pollhd;
1194 1194 }
1195 1195
1196 1196 mutex_exit(&state->ins_lock);
1197 1197
1198 1198 return (0);
1199 1199 }
1200 1200
1201 1201 /*ARGSUSED*/
1202 1202 static int
1203 1203 inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
1204 1204 {
1205 1205 inotify_state_t *state;
1206 1206 minor_t minor = getminor(dev);
1207 1207 file_t *fp;
1208 1208 int rval;
1209 1209
1210 1210 state = ddi_get_soft_state(inotify_softstate, minor);
1211 1211
1212 1212 switch (cmd) {
1213 1213 case INOTIFYIOC_ADD_WATCH: {
1214 1214 inotify_addwatch_t addwatch;
1215 1215 file_t *fp;
1216 1216
1217 1217 if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
1218 1218 return (EFAULT);
1219 1219
1220 1220 if ((fp = getf(addwatch.inaw_fd)) == NULL)
1221 1221 return (EBADF);
1222 1222
1223 1223 rval = inotify_add_watch(state, fp->f_vnode,
1224 1224 addwatch.inaw_mask, rv);
1225 1225
1226 1226 releasef(addwatch.inaw_fd);
1227 1227 return (rval);
1228 1228 }
1229 1229
1230 1230 case INOTIFYIOC_ADD_CHILD: {
1231 1231 inotify_addchild_t addchild;
1232 1232 char name[MAXPATHLEN];
1233 1233
1234 1234 if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
1235 1235 return (EFAULT);
1236 1236
1237 1237 if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
1238 1238 return (EFAULT);
1239 1239
1240 1240 if ((fp = getf(addchild.inac_fd)) == NULL)
1241 1241 return (EBADF);
1242 1242
1243 1243 rval = inotify_add_child(state, fp->f_vnode, name);
1244 1244
1245 1245 releasef(addchild.inac_fd);
1246 1246 return (rval);
1247 1247 }
1248 1248
1249 1249 case INOTIFYIOC_RM_WATCH:
1250 1250 return (inotify_rm_watch(state, arg));
1251 1251
1252 1252 case INOTIFYIOC_ACTIVATE:
1253 1253 return (inotify_activate(state, arg));
1254 1254
1255 1255 case FIONREAD: {
1256 1256 int32_t size;
1257 1257
1258 1258 mutex_enter(&state->ins_lock);
1259 1259 size = state->ins_size;
1260 1260 mutex_exit(&state->ins_lock);
1261 1261
1262 1262 if (copyout(&size, (void *)arg, sizeof (size)) != 0)
1263 1263 return (EFAULT);
1264 1264
1265 1265 return (0);
1266 1266 }
1267 1267
1268 1268 default:
1269 1269 break;
1270 1270 }
1271 1271
1272 1272 return (ENOTTY);
1273 1273 }
1274 1274
1275 1275 /*ARGSUSED*/
1276 1276 static int
1277 1277 inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
1278 1278 {
1279 1279 inotify_state_t *state, **sp;
1280 1280 inotify_watch_t *watch, *zombies;
1281 1281 inotify_kevent_t *event;
1282 1282 minor_t minor = getminor(dev);
1283 1283
1284 1284 state = ddi_get_soft_state(inotify_softstate, minor);
1285 1285
1286 1286 if (state->ins_pollhd.ph_list != NULL) {
1287 1287 pollwakeup(&state->ins_pollhd, POLLERR);
1288 1288 pollhead_clean(&state->ins_pollhd);
1289 1289 }
1290 1290
1291 1291 mutex_enter(&state->ins_lock);
1292 1292
1293 1293 /*
1294 1294 * First, destroy all of our watches.
1295 1295 */
1296 1296 while ((watch = avl_first(&state->ins_bywd)) != NULL)
1297 1297 inotify_watch_remove(state, watch);
1298 1298
1299 1299 /*
1300 1300 * And now destroy our event queue.
1301 1301 */
1302 1302 while ((event = state->ins_head) != NULL) {
1303 1303 state->ins_head = event->ine_next;
1304 1304 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1305 1305 }
1306 1306
1307 1307 zombies = state->ins_zombies;
1308 1308 state->ins_zombies = NULL;
1309 1309 mutex_exit(&state->ins_lock);
1310 1310
1311 1311 /*
1312 1312 * Now that our state lock is dropped, we can synchronously wait on
1313 1313 * any zombies.
1314 1314 */
1315 1315 while ((watch = zombies) != NULL) {
1316 1316 zombies = zombies->inw_parent;
1317 1317
1318 1318 mutex_enter(&watch->inw_lock);
1319 1319
1320 1320 while (watch->inw_refcnt > 1)
1321 1321 cv_wait(&watch->inw_cv, &watch->inw_lock);
1322 1322
1323 1323 inotify_watch_destroy(watch);
1324 1324 }
1325 1325
1326 1326 if (state->ins_cleaner != NULL) {
1327 1327 ddi_periodic_delete(state->ins_cleaner);
1328 1328 state->ins_cleaner = NULL;
1329 1329 }
1330 1330
1331 1331 mutex_enter(&inotify_lock);
1332 1332
1333 1333 /*
1334 1334 * Remove our state from our global list, and release our hold on
1335 1335 * the cred.
1336 1336 */
1337 1337 for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
1338 1338 VERIFY(*sp != NULL);
1339 1339
1340 1340 *sp = (*sp)->ins_next;
1341 1341 crfree(state->ins_cred);
1342 1342 vmem_destroy(state->ins_wds);
1343 1343
1344 1344 ddi_soft_state_free(inotify_softstate, minor);
1345 1345 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1346 1346
1347 1347 mutex_exit(&inotify_lock);
1348 1348
1349 1349 return (0);
1350 1350 }
1351 1351
1352 1352 /*ARGSUSED*/
1353 1353 static int
1354 1354 inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1355 1355 {
1356 1356 mutex_enter(&inotify_lock);
1357 1357
1358 1358 if (ddi_soft_state_init(&inotify_softstate,
1359 1359 sizeof (inotify_state_t), 0) != 0) {
1360 1360 cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
1361 1361 mutex_exit(&inotify_lock);
1362 1362 return (DDI_FAILURE);
1363 1363 }
1364 1364
1365 1365 if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
1366 1366 INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
1367 1367 cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
1368 1368 ddi_soft_state_fini(&inotify_softstate);
1369 1369 mutex_exit(&inotify_lock);
1370 1370 return (DDI_FAILURE);
1371 1371 }
1372 1372
1373 1373 if (fem_create("inotify_fem",
1374 1374 inotify_vnodesrc_template, &inotify_femp) != 0) {
1375 1375 cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
1376 1376 ddi_remove_minor_node(devi, NULL);
1377 1377 ddi_soft_state_fini(&inotify_softstate);
1378 1378 mutex_exit(&inotify_lock);
1379 1379 return (DDI_FAILURE);
1380 1380 }
1381 1381
1382 1382 ddi_report_dev(devi);
1383 1383 inotify_devi = devi;
1384 1384
1385 1385 inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
1386 1386 UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
1387 1387 VM_SLEEP | VMC_IDENTIFIER);
1388 1388
1389 1389 mutex_exit(&inotify_lock);
1390 1390
1391 1391 return (DDI_SUCCESS);
1392 1392 }
1393 1393
1394 1394 /*ARGSUSED*/
1395 1395 static int
1396 1396 inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1397 1397 {
1398 1398 switch (cmd) {
1399 1399 case DDI_DETACH:
1400 1400 break;
1401 1401
1402 1402 case DDI_SUSPEND:
1403 1403 return (DDI_SUCCESS);
1404 1404
1405 1405 default:
1406 1406 return (DDI_FAILURE);
1407 1407 }
1408 1408
1409 1409 mutex_enter(&inotify_lock);
1410 1410 fem_free(inotify_femp);
1411 1411 vmem_destroy(inotify_minor);
1412 1412
1413 1413 ddi_remove_minor_node(inotify_devi, NULL);
1414 1414 inotify_devi = NULL;
1415 1415
1416 1416 ddi_soft_state_fini(&inotify_softstate);
1417 1417 mutex_exit(&inotify_lock);
1418 1418
1419 1419 return (DDI_SUCCESS);
1420 1420 }
1421 1421
1422 1422 /*ARGSUSED*/
1423 1423 static int
1424 1424 inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1425 1425 {
1426 1426 int error;
1427 1427
1428 1428 switch (infocmd) {
1429 1429 case DDI_INFO_DEVT2DEVINFO:
1430 1430 *result = (void *)inotify_devi;
1431 1431 error = DDI_SUCCESS;
1432 1432 break;
1433 1433 case DDI_INFO_DEVT2INSTANCE:
1434 1434 *result = (void *)0;
1435 1435 error = DDI_SUCCESS;
1436 1436 break;
1437 1437 default:
1438 1438 error = DDI_FAILURE;
1439 1439 }
1440 1440 return (error);
1441 1441 }
1442 1442
1443 1443 static struct cb_ops inotify_cb_ops = {
1444 1444 inotify_open, /* open */
1445 1445 inotify_close, /* close */
1446 1446 nulldev, /* strategy */
1447 1447 nulldev, /* print */
1448 1448 nodev, /* dump */
1449 1449 inotify_read, /* read */
1450 1450 nodev, /* write */
1451 1451 inotify_ioctl, /* ioctl */
1452 1452 nodev, /* devmap */
1453 1453 nodev, /* mmap */
1454 1454 nodev, /* segmap */
1455 1455 inotify_poll, /* poll */
1456 1456 ddi_prop_op, /* cb_prop_op */
1457 1457 0, /* streamtab */
1458 1458 D_NEW | D_MP /* Driver compatibility flag */
1459 1459 };
1460 1460
1461 1461 static struct dev_ops inotify_ops = {
1462 1462 DEVO_REV, /* devo_rev */
1463 1463 0, /* refcnt */
1464 1464 inotify_info, /* get_dev_info */
1465 1465 nulldev, /* identify */
1466 1466 nulldev, /* probe */
1467 1467 inotify_attach, /* attach */
1468 1468 inotify_detach, /* detach */
1469 1469 nodev, /* reset */
1470 1470 &inotify_cb_ops, /* driver operations */
1471 1471 NULL, /* bus operations */
1472 1472 nodev, /* dev power */
1473 1473 ddi_quiesce_not_needed, /* quiesce */
1474 1474 };
1475 1475
1476 1476 static struct modldrv modldrv = {
1477 1477 &mod_driverops, /* module type (this is a pseudo driver) */
1478 1478 "inotify support", /* name of module */
1479 1479 &inotify_ops, /* driver ops */
1480 1480 };
1481 1481
1482 1482 static struct modlinkage modlinkage = {
1483 1483 MODREV_1,
1484 1484 (void *)&modldrv,
1485 1485 NULL
1486 1486 };
1487 1487
1488 1488 int
1489 1489 _init(void)
1490 1490 {
1491 1491 return (mod_install(&modlinkage));
1492 1492 }
1493 1493
1494 1494 int
1495 1495 _info(struct modinfo *modinfop)
1496 1496 {
1497 1497 return (mod_info(&modlinkage, modinfop));
1498 1498 }
1499 1499
1500 1500 int
1501 1501 _fini(void)
1502 1502 {
1503 1503 return (mod_remove(&modlinkage));
1504 1504 }
|
↓ open down ↓ |
986 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX