Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/inotify.c
+++ new/usr/src/uts/common/io/inotify.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 14 * Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
15 15 */
16 16
17 17 /*
18 18 * Support for the inotify facility, a Linux-borne facility for asynchronous
19 19 * notification of certain events on specified files or directories. Our
20 20 * implementation broadly leverages the file event monitoring facility, and
21 21 * would actually be quite straightforward were it not for a very serious
22 22 * blunder in the inotify interface: in addition to allowing for one to be
23 23 * notified on events on a particular file or directory, inotify also allows
24 24 * for one to be notified on certain events on files _within_ a watched
25 25 * directory -- even though those events have absolutely nothing to do with
26 26 * the directory itself. This leads to all sorts of madness because file
27 27 * operations are (of course) not undertaken on paths but rather on open
28 28 * files -- and the relationships between open files and the paths that resolve
29 29 * to those files are neither static nor isomorphic. We implement this
30 30 * concept by having _child watches_ when directories are watched with events
31 31 * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
32 32 * first added, and we modify those child watches dynamically as files are
33 33 * created, deleted, moved into or moved out of the specified directory. This
34 34 * mechanism works well, absent hard links. Hard links, unfortunately, break
35 35 * this rather badly, and the user is warned that watches on directories that
36 36 * have multiple directory entries referring to the same file may behave
37 37 * unexpectedly.
38 38 */
39 39
40 40 #include <sys/ddi.h>
41 41 #include <sys/sunddi.h>
42 42 #include <sys/inotify.h>
43 43 #include <sys/fem.h>
44 44 #include <sys/conf.h>
45 45 #include <sys/stat.h>
46 46 #include <sys/vfs_opreg.h>
47 47 #include <sys/vmem.h>
48 48 #include <sys/avl.h>
49 49 #include <sys/sysmacros.h>
50 50 #include <sys/cyclic.h>
51 51 #include <sys/filio.h>
52 52
53 53 struct inotify_state;
54 54 struct inotify_kevent;
55 55
56 56 typedef struct inotify_watch inotify_watch_t;
57 57 typedef struct inotify_state inotify_state_t;
58 58 typedef struct inotify_kevent inotify_kevent_t;
59 59
60 60 struct inotify_watch {
61 61 kmutex_t inw_lock; /* lock protecting ref count */
62 62 int inw_refcnt; /* reference count */
63 63 uint8_t inw_zombie:1; /* boolean: is zombie */
64 64 uint8_t inw_fired:1; /* boolean: fired one-shot */
65 65 uint8_t inw_active:1; /* boolean: watch is active */
66 66 uint8_t inw_orphaned:1; /* boolean: orphaned */
67 67 kcondvar_t inw_cv; /* condvar for zombifier */
68 68 uint32_t inw_mask; /* mask of watch */
69 69 int32_t inw_wd; /* watch descriptor */
70 70 vnode_t *inw_vp; /* underlying vnode */
71 71 inotify_watch_t *inw_parent; /* parent, if a child */
72 72 avl_node_t inw_byvp; /* watches by vnode */
73 73 avl_node_t inw_bywd; /* watches by descriptor */
74 74 avl_tree_t inw_children; /* children, if a parent */
75 75 char *inw_name; /* name, if a child */
76 76 list_node_t inw_orphan; /* orphan list */
77 77 cred_t *inw_cred; /* cred, if orphaned */
78 78 inotify_state_t *inw_state; /* corresponding state */
79 79 };
80 80
81 81 struct inotify_kevent {
82 82 inotify_kevent_t *ine_next; /* next event in queue */
83 83 struct inotify_event ine_event; /* event (variable size) */
84 84 };
85 85
86 86 #define INOTIFY_EVENT_LENGTH(ev) \
87 87 (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
88 88
89 89 struct inotify_state {
90 90 kmutex_t ins_lock; /* lock protecting state */
91 91 avl_tree_t ins_byvp; /* watches by vnode */
92 92 avl_tree_t ins_bywd; /* watches by descriptor */
93 93 vmem_t *ins_wds; /* watch identifier arena */
94 94 int ins_maxwatches; /* maximum number of watches */
95 95 int ins_maxevents; /* maximum number of events */
96 96 int ins_nevents; /* current # of events */
97 97 int32_t ins_size; /* total size of events */
98 98 inotify_kevent_t *ins_head; /* head of event queue */
99 99 inotify_kevent_t *ins_tail; /* tail of event queue */
100 100 pollhead_t ins_pollhd; /* poll head */
101 101 kcondvar_t ins_cv; /* condvar for reading */
102 102 list_t ins_orphans; /* orphan list */
103 103 ddi_periodic_t ins_cleaner; /* cyclic for cleaning */
104 104 inotify_watch_t *ins_zombies; /* zombie watch list */
105 105 cred_t *ins_cred; /* creator's credentials */
106 106 inotify_state_t *ins_next; /* next state on global list */
107 107 };
108 108
109 109 /*
110 110 * Tunables (exported read-only in lx-branded zones via /proc).
111 111 */
112 112 int inotify_maxwatches = 8192; /* max watches per instance */
113 113 int inotify_maxevents = 16384; /* max events */
114 114 int inotify_maxinstances = 128; /* max instances per user */
115 115
116 116 /*
117 117 * Internal global variables.
118 118 */
119 119 static kmutex_t inotify_lock; /* lock protecting state */
120 120 static dev_info_t *inotify_devi; /* device info */
121 121 static fem_t *inotify_femp; /* FEM pointer */
122 122 static vmem_t *inotify_minor; /* minor number arena */
123 123 static void *inotify_softstate; /* softstate pointer */
124 124 static inotify_state_t *inotify_state; /* global list if state */
125 125
126 126 static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
127 127 static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
128 128 static void inotify_watch_delete(inotify_watch_t *, uint32_t);
129 129 static void inotify_watch_remove(inotify_state_t *state,
130 130 inotify_watch_t *watch);
131 131
132 132 static int
133 133 inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
134 134 cred_t *cr, caller_context_t *ct)
135 135 {
136 136 inotify_watch_t *watch = vf->fa_fnode->fn_available;
137 137 int rval;
138 138
139 139 if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
140 140 inotify_watch_event(watch, flag & FWRITE ?
141 141 IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
142 142 }
143 143
144 144 return (rval);
145 145 }
146 146
147 147 static int
148 148 inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
149 149 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
150 150 vsecattr_t *vsecp)
151 151 {
152 152 inotify_watch_t *watch = vf->fa_fnode->fn_available;
153 153 int rval;
154 154
155 155 if ((rval = vnext_create(vf, name, vap, excl, mode,
156 156 vpp, cr, flag, ct, vsecp)) == 0) {
157 157 inotify_watch_insert(watch, *vpp, name);
158 158 inotify_watch_event(watch, IN_CREATE, name);
159 159 }
160 160
161 161 return (rval);
162 162 }
163 163
164 164 static int
165 165 inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
166 166 caller_context_t *ct, int flags)
167 167 {
168 168 inotify_watch_t *watch = vf->fa_fnode->fn_available;
169 169 int rval;
170 170
171 171 if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
172 172 inotify_watch_insert(watch, svp, tnm);
173 173 inotify_watch_event(watch, IN_CREATE, tnm);
174 174 }
175 175
176 176 return (rval);
177 177 }
178 178
179 179 static int
180 180 inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
181 181 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
182 182 {
183 183 inotify_watch_t *watch = vf->fa_fnode->fn_available;
184 184 int rval;
185 185
186 186 if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
187 187 ct, flags, vsecp)) == 0) {
188 188 inotify_watch_insert(watch, *vpp, name);
189 189 inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
190 190 }
191 191
192 192 return (rval);
193 193 }
194 194
195 195 static int
196 196 inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
197 197 {
198 198 inotify_watch_t *watch = vf->fa_fnode->fn_available;
199 199 int rval;
200 200
201 201 if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
202 202 inotify_watch_event(watch, IN_OPEN, NULL);
203 203
204 204 return (rval);
205 205 }
206 206
207 207 static int
208 208 inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
209 209 caller_context_t *ct)
210 210 {
211 211 inotify_watch_t *watch = vf->fa_fnode->fn_available;
212 212 int rval = vnext_read(vf, uiop, ioflag, cr, ct);
213 213 inotify_watch_event(watch, IN_ACCESS, NULL);
214 214
215 215 return (rval);
216 216 }
217 217
218 218 static int
219 219 inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
220 220 caller_context_t *ct, int flags)
221 221 {
222 222 inotify_watch_t *watch = vf->fa_fnode->fn_available;
223 223 int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
224 224 inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
225 225
226 226 return (rval);
227 227 }
228 228
229 229 int
230 230 inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
231 231 int flags)
232 232 {
233 233 inotify_watch_t *watch = vf->fa_fnode->fn_available;
234 234 int rval;
235 235
236 236 if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
237 237 inotify_watch_event(watch, IN_DELETE, nm);
238 238
239 239 return (rval);
240 240 }
241 241
242 242 int
243 243 inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
244 244 caller_context_t *ct, int flags)
245 245 {
246 246 inotify_watch_t *watch = vf->fa_fnode->fn_available;
247 247 int rval;
248 248
249 249 if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
250 250 inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
251 251
252 252 return (rval);
253 253 }
254 254
255 255 static int
256 256 inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
257 257 caller_context_t *ct)
258 258 {
259 259 inotify_watch_t *watch = vf->fa_fnode->fn_available;
260 260 int rval;
261 261
262 262 if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
263 263 inotify_watch_event(watch, IN_ATTRIB, NULL);
264 264
265 265 return (rval);
266 266 }
267 267
268 268 static int
269 269 inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
270 270 caller_context_t *ct)
271 271 {
272 272 inotify_watch_t *watch = vf->fa_fnode->fn_available;
273 273 int rval = vnext_write(vf, uiop, ioflag, cr, ct);
274 274 inotify_watch_event(watch, IN_MODIFY, NULL);
275 275
276 276 return (rval);
277 277 }
278 278
279 279 static int
280 280 inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
281 281 caller_context_t *ct)
282 282 {
283 283 inotify_watch_t *watch = vf->fa_fnode->fn_available;
284 284
285 285 switch (vnevent) {
286 286 case VE_RENAME_SRC:
287 287 inotify_watch_event(watch, IN_MOVE_SELF, NULL);
288 288 inotify_watch_delete(watch, IN_MOVE_SELF);
289 289 break;
290 290 case VE_REMOVE:
291 291 /*
292 292 * Linux will apparently fire an IN_ATTRIB event when the link
293 293 * count changes (including when it drops to 0 on a remove).
294 294 * This is merely somewhat odd; what is amazing is that this
295 295 * IN_ATTRIB event is not visible on an inotify watch on the
296 296 * parent directory. (IN_ATTRIB events are normally sent to
297 297 * watches on the parent directory). While it's hard to
298 298 * believe that this constitutes desired semantics, ltp
299 299 * unfortunately tests this case (if implicitly); in the name
300 300 * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
301 301 * explicitly watching the file that has been removed.
302 302 */
303 303 if (watch->inw_parent == NULL)
304 304 inotify_watch_event(watch, IN_ATTRIB, NULL);
305 305
306 306 /*FALLTHROUGH*/
307 307 case VE_RENAME_DEST:
308 308 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
309 309 inotify_watch_delete(watch, IN_DELETE_SELF);
310 310 break;
311 311 case VE_RMDIR:
312 312 /*
313 313 * It seems that IN_ISDIR should really be OR'd in here, but
314 314 * Linux doesn't seem to do that in this case; for the sake of
315 315 * bug-for-bug compatibility, we don't do it either.
316 316 */
317 317 inotify_watch_event(watch, IN_DELETE_SELF, NULL);
318 318 inotify_watch_delete(watch, IN_DELETE_SELF);
319 319 break;
320 320 case VE_CREATE:
321 321 case VE_TRUNCATE:
322 322 case VE_RESIZE:
323 323 inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
324 324 break;
325 325 case VE_LINK:
326 326 inotify_watch_event(watch, IN_ATTRIB, NULL);
327 327 break;
328 328 case VE_RENAME_SRC_DIR:
329 329 inotify_watch_event(watch, IN_MOVED_FROM, name);
330 330 break;
331 331 case VE_RENAME_DEST_DIR:
332 332 if (name == NULL)
333 333 name = dvp->v_path;
334 334
335 335 inotify_watch_insert(watch, dvp, name);
336 336 inotify_watch_event(watch, IN_MOVED_TO, name);
337 337 break;
338 338 case VE_SUPPORT:
339 339 case VE_MOUNTEDOVER:
340 340 case VE_PRE_RENAME_SRC:
341 341 case VE_PRE_RENAME_DEST:
342 342 case VE_PRE_RENAME_DEST_DIR:
343 343 break;
344 344 }
345 345
346 346 return (vnext_vnevent(vf, vnevent, dvp, name, ct));
347 347 }
348 348
349 349 const fs_operation_def_t inotify_vnodesrc_template[] = {
350 350 VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
351 351 VOPNAME_CREATE, { .femop_create = inotify_fop_create },
352 352 VOPNAME_LINK, { .femop_link = inotify_fop_link },
353 353 VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
354 354 VOPNAME_OPEN, { .femop_open = inotify_fop_open },
355 355 VOPNAME_READ, { .femop_read = inotify_fop_read },
356 356 VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
357 357 VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
358 358 VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
359 359 VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
360 360 VOPNAME_WRITE, { .femop_write = inotify_fop_write },
361 361 VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
362 362 NULL, NULL
363 363 };
364 364
365 365 static int
366 366 inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
367 367 {
368 368 if (lhs->inw_wd < rhs->inw_wd)
369 369 return (-1);
370 370
371 371 if (lhs->inw_wd > rhs->inw_wd)
372 372 return (1);
373 373
374 374 return (0);
375 375 }
376 376
377 377 static int
378 378 inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
379 379 {
380 380 uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
381 381
382 382 if (lvp < rvp)
383 383 return (-1);
384 384
385 385 if (lvp > rvp)
386 386 return (1);
387 387
388 388 return (0);
389 389 }
390 390
391 391 static void
392 392 inotify_watch_hold(inotify_watch_t *watch)
393 393 {
394 394 mutex_enter(&watch->inw_lock);
395 395 VERIFY(watch->inw_refcnt > 0);
396 396 watch->inw_refcnt++;
397 397 mutex_exit(&watch->inw_lock);
398 398 }
399 399
400 400 static void
401 401 inotify_watch_release(inotify_watch_t *watch)
402 402 {
403 403 mutex_enter(&watch->inw_lock);
404 404 VERIFY(watch->inw_refcnt > 1);
405 405
406 406 if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
407 407 /*
408 408 * We're down to our last reference; kick anyone that might be
409 409 * waiting.
410 410 */
411 411 cv_signal(&watch->inw_cv);
412 412 }
413 413
414 414 mutex_exit(&watch->inw_lock);
415 415 }
416 416
417 417 static void
418 418 inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
419 419 {
420 420 inotify_kevent_t *event, *tail;
421 421 inotify_state_t *state = watch->inw_state;
422 422 uint32_t wd = watch->inw_wd, cookie = 0, len;
423 423 boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
424 424 inotify_watch_t *source = watch;
425 425
426 426 if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
427 427 return;
428 428
429 429 if (watch->inw_parent != NULL) {
430 430 /*
431 431 * This is an event on the child; if this isn't a valid child
432 432 * event, return. Otherwise, we move our watch to be our
433 433 * parent (which we know is around because we have a hold on
434 434 * it) and continue.
435 435 */
436 436 if (!(mask & IN_CHILD_EVENTS))
437 437 return;
438 438
439 439 name = watch->inw_name;
440 440 watch = watch->inw_parent;
441 441 wd = watch->inw_wd;
442 442 }
443 443
444 444 if (!removal) {
445 445 mutex_enter(&state->ins_lock);
446 446
447 447 if (watch->inw_zombie ||
448 448 watch->inw_fired || !watch->inw_active) {
449 449 mutex_exit(&state->ins_lock);
450 450 return;
451 451 }
452 452 } else {
453 453 if (!watch->inw_active)
454 454 return;
455 455
456 456 VERIFY(MUTEX_HELD(&state->ins_lock));
457 457 }
458 458
459 459 /*
460 460 * If this is an operation on a directory and it's a child event
461 461 * (event if it's not on a child), we specify IN_ISDIR.
462 462 */
463 463 if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
464 464 mask |= IN_ISDIR;
465 465
466 466 if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
467 467 cookie = (uint32_t)curthread->t_did;
468 468
469 469 if (state->ins_nevents >= state->ins_maxevents) {
470 470 /*
471 471 * We're at our maximum number of events -- turn our event
472 472 * into an IN_Q_OVERFLOW event, which will be coalesced if
473 473 * it's already the tail event.
474 474 */
475 475 mask = IN_Q_OVERFLOW;
476 476 wd = (uint32_t)-1;
477 477 cookie = 0;
478 478 len = 0;
479 479 }
480 480
481 481 if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
482 482 tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
483 483 ((tail->ine_event.len == 0 && len == 0) ||
484 484 (name != NULL && tail->ine_event.len != 0 &&
485 485 strcmp(tail->ine_event.name, name) == 0))) {
486 486 /*
487 487 * This is an implicitly coalesced event; we're done.
488 488 */
489 489 if (!removal)
490 490 mutex_exit(&state->ins_lock);
491 491 return;
492 492 }
493 493
494 494 if (name != NULL) {
495 495 len = strlen(name) + 1;
496 496 len = roundup(len, sizeof (struct inotify_event));
497 497 } else {
|
↓ open down ↓ |
497 lines elided |
↑ open up ↑ |
498 498 len = 0;
499 499 }
500 500
501 501 event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
502 502 event->ine_event.wd = wd;
503 503 event->ine_event.mask = (uint32_t)mask;
504 504 event->ine_event.cookie = cookie;
505 505 event->ine_event.len = len;
506 506
507 507 if (name != NULL)
508 - strcpy(event->ine_event.name, name);
508 + (void) strcpy(event->ine_event.name, name);
509 509
510 510 if (tail != NULL) {
511 511 tail->ine_next = event;
512 512 } else {
513 513 VERIFY(state->ins_head == NULL);
514 514 state->ins_head = event;
515 515 cv_broadcast(&state->ins_cv);
516 516 }
517 517
518 518 state->ins_tail = event;
519 519 state->ins_nevents++;
520 520 state->ins_size += sizeof (event->ine_event) + len;
521 521
522 522 if (removal)
523 523 return;
524 524
525 525 if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
526 526 /*
527 527 * If this is a one-shot, we need to remove the watch. (Note
528 528 * that this will recurse back into inotify_watch_event() to
529 529 * fire the IN_IGNORED event -- but with "removal" set.)
530 530 */
531 531 watch->inw_fired = 1;
532 532 inotify_watch_remove(state, watch);
533 533 }
534 534
535 535 mutex_exit(&state->ins_lock);
536 536 pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
537 537 }
538 538
539 539 /*
540 540 * Destroy a watch. By the time we're in here, the watch must have exactly
541 541 * one reference.
542 542 */
543 543 static void
544 544 inotify_watch_destroy(inotify_watch_t *watch)
545 545 {
546 546 VERIFY(MUTEX_HELD(&watch->inw_lock));
547 547
548 548 if (watch->inw_name != NULL)
549 549 kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
550 550
551 551 kmem_free(watch, sizeof (inotify_watch_t));
552 552 }
553 553
554 554 /*
555 555 * Zombify a watch. By the time we come in here, it must be true that the
556 556 * watch has already been fem_uninstall()'d -- the only reference should be
557 557 * in the state's data structure. If we can get away with freeing it, we'll
558 558 * do that -- but if the reference count is greater than one due to an active
559 559 * vnode operation, we'll put this watch on the zombie list on the state
560 560 * structure.
561 561 */
562 562 static void
563 563 inotify_watch_zombify(inotify_watch_t *watch)
564 564 {
565 565 inotify_state_t *state = watch->inw_state;
566 566
567 567 VERIFY(MUTEX_HELD(&state->ins_lock));
568 568 VERIFY(!watch->inw_zombie);
569 569
570 570 watch->inw_zombie = 1;
571 571
572 572 if (watch->inw_parent != NULL) {
573 573 inotify_watch_release(watch->inw_parent);
574 574 } else {
575 575 avl_remove(&state->ins_byvp, watch);
576 576 avl_remove(&state->ins_bywd, watch);
577 577 vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
578 578 watch->inw_wd = -1;
579 579 }
580 580
581 581 mutex_enter(&watch->inw_lock);
582 582
583 583 if (watch->inw_refcnt == 1) {
584 584 /*
585 585 * There are no operations in flight and there is no way
586 586 * for anyone to discover this watch -- we can destroy it.
587 587 */
588 588 inotify_watch_destroy(watch);
589 589 } else {
590 590 /*
591 591 * There are operations in flight; we will need to enqueue
592 592 * this for later destruction.
593 593 */
594 594 watch->inw_parent = state->ins_zombies;
595 595 state->ins_zombies = watch;
596 596 mutex_exit(&watch->inw_lock);
597 597 }
598 598 }
599 599
600 600 static inotify_watch_t *
601 601 inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
602 602 const char *name, vnode_t *vp, uint32_t mask)
603 603 {
604 604 inotify_watch_t *watch;
605 605 int err;
606 606
607 607 VERIFY(MUTEX_HELD(&state->ins_lock));
608 608
609 609 watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
610 610
611 611 watch->inw_vp = vp;
612 612 watch->inw_mask = mask;
613 613 watch->inw_state = state;
614 614 watch->inw_refcnt = 1;
615 615
616 616 if (parent == NULL) {
617 617 watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
618 618 1, VM_BESTFIT | VM_SLEEP);
619 619 avl_add(&state->ins_byvp, watch);
620 620 avl_add(&state->ins_bywd, watch);
621 621
|
↓ open down ↓ |
103 lines elided |
↑ open up ↑ |
622 622 avl_create(&watch->inw_children,
623 623 (int(*)(const void *, const void *))inotify_watch_cmpvp,
624 624 sizeof (inotify_watch_t),
625 625 offsetof(inotify_watch_t, inw_byvp));
626 626 } else {
627 627 VERIFY(name != NULL);
628 628 inotify_watch_hold(parent);
629 629 watch->inw_mask &= IN_CHILD_EVENTS;
630 630 watch->inw_parent = parent;
631 631 watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
632 - strcpy(watch->inw_name, name);
632 + /* strcpy() is safe, because strlen(name) bounds us. */
633 + (void) strcpy(watch->inw_name, name);
633 634
634 635 avl_add(&parent->inw_children, watch);
635 636 }
636 637
637 638 /*
638 639 * Add our monitor to the vnode. We must not have the watch lock held
639 640 * when we do this, as it will immediately hold our watch.
640 641 */
641 642 err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
642 643 (void (*)(void *))inotify_watch_hold,
643 644 (void (*)(void *))inotify_watch_release);
644 645
645 646 VERIFY(err == 0);
646 647
647 648 return (watch);
648 649 }
649 650
650 651 /*
651 652 * Remove a (non-child) watch. This is called from either synchronous context
652 653 * via inotify_rm_watch() or monitor context via either a vnevent or a
653 654 * one-shot.
654 655 */
655 656 static void
656 657 inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
657 658 {
658 659 inotify_watch_t *child;
659 660 int err;
660 661
661 662 VERIFY(MUTEX_HELD(&state->ins_lock));
662 663 VERIFY(watch->inw_parent == NULL);
663 664
664 665 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
665 666 VERIFY(err == 0);
666 667
667 668 /*
668 669 * If we have children, we're going to remove them all and set them
669 670 * all to be zombies.
670 671 */
671 672 while ((child = avl_first(&watch->inw_children)) != NULL) {
672 673 VERIFY(child->inw_parent == watch);
673 674 avl_remove(&watch->inw_children, child);
674 675
675 676 err = fem_uninstall(child->inw_vp, inotify_femp, child);
676 677 VERIFY(err == 0);
677 678
678 679 /*
679 680 * If this child watch has been orphaned, remove it from the
680 681 * state's list of orphans.
681 682 */
682 683 if (child->inw_orphaned) {
683 684 list_remove(&state->ins_orphans, child);
684 685 crfree(child->inw_cred);
685 686 }
686 687
687 688 VN_RELE(child->inw_vp);
688 689
689 690 /*
690 691 * We're down (or should be down) to a single reference to
691 692 * this child watch; it's safe to zombify it.
692 693 */
693 694 inotify_watch_zombify(child);
694 695 }
695 696
696 697 inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
697 698 VN_RELE(watch->inw_vp);
698 699
699 700 /*
700 701 * It's now safe to zombify the watch -- we know that the only reference
701 702 * can come from operations in flight.
702 703 */
703 704 inotify_watch_zombify(watch);
704 705 }
705 706
706 707 /*
707 708 * Delete a watch. Should only be called from VOP context.
708 709 */
709 710 static void
710 711 inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
711 712 {
712 713 inotify_state_t *state = watch->inw_state;
713 714 inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
714 715 int err;
715 716
716 717 if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
717 718 return;
718 719
719 720 mutex_enter(&state->ins_lock);
720 721
721 722 if (watch->inw_zombie) {
722 723 mutex_exit(&state->ins_lock);
723 724 return;
724 725 }
725 726
726 727 if ((parent = watch->inw_parent) == NULL) {
727 728 if (event == IN_DELETE_SELF) {
728 729 /*
729 730 * If we're here because we're being deleted and we
730 731 * are not a child watch, we need to delete the entire
731 732 * watch, children and all.
732 733 */
733 734 inotify_watch_remove(state, watch);
734 735 }
735 736
736 737 mutex_exit(&state->ins_lock);
737 738 return;
738 739 } else {
739 740 if (event == IN_DELETE_SELF &&
740 741 !(parent->inw_mask & IN_EXCL_UNLINK)) {
741 742 /*
742 743 * This is a child watch for a file that is being
743 744 * removed and IN_EXCL_UNLINK has not been specified;
744 745 * indicate that it is orphaned and add it to the list
745 746 * of orphans. (This list will be checked by the
746 747 * cleaning cyclic to determine when the watch has
747 748 * become the only hold on the vnode, at which point
748 749 * the watch can be zombified.) Note that we check
749 750 * if the watch is orphaned before we orphan it: hard
750 751 * links make it possible for VE_REMOVE to be called
751 752 * multiple times on the same vnode. (!)
752 753 */
753 754 if (!watch->inw_orphaned) {
754 755 watch->inw_orphaned = 1;
755 756 watch->inw_cred = CRED();
756 757 crhold(watch->inw_cred);
757 758 list_insert_head(&state->ins_orphans, watch);
758 759 }
759 760
760 761 mutex_exit(&state->ins_lock);
761 762 return;
762 763 }
763 764
764 765 if (watch->inw_orphaned) {
765 766 /*
766 767 * If we're here, a file was orphaned and then later
767 768 * moved -- which almost certainly means that hard
768 769 * links are on the scene. We choose the orphan over
769 770 * the move because we don't want to spuriously
770 771 * drop events if we can avoid it.
771 772 */
772 773 crfree(watch->inw_cred);
773 774 list_remove(&state->ins_orphans, watch);
774 775 }
775 776 }
776 777
777 778 if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
778 779 /*
779 780 * This watch has already been deleted from the parent.
780 781 */
781 782 mutex_exit(&state->ins_lock);
782 783 return;
783 784 }
784 785
785 786 avl_remove(&parent->inw_children, watch);
786 787 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
787 788 VERIFY(err == 0);
788 789
789 790 VN_RELE(watch->inw_vp);
790 791
791 792 /*
792 793 * It's now safe to zombify the watch -- which won't actually delete
793 794 * it as we know that the reference count is greater than 1.
794 795 */
795 796 inotify_watch_zombify(watch);
796 797 mutex_exit(&state->ins_lock);
797 798 }
798 799
799 800 /*
800 801 * Insert a new child watch. Should only be called from VOP context when
801 802 * a child is created in a watched directory.
802 803 */
803 804 static void
804 805 inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
805 806 {
806 807 inotify_state_t *state = watch->inw_state;
807 808 inotify_watch_t cmp = { .inw_vp = vp };
808 809
809 810 if (!(watch->inw_mask & IN_CHILD_EVENTS))
810 811 return;
811 812
812 813 mutex_enter(&state->ins_lock);
813 814
814 815 if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
815 816 mutex_exit(&state->ins_lock);
816 817 return;
817 818 }
818 819
819 820 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
820 821 mutex_exit(&state->ins_lock);
821 822 return;
822 823 }
823 824
824 825 VN_HOLD(vp);
825 826 watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
826 827 VERIFY(watch != NULL);
827 828
828 829 mutex_exit(&state->ins_lock);
829 830 }
830 831
831 832
832 833 static int
833 834 inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
834 835 int32_t *wdp)
835 836 {
836 837 inotify_watch_t *watch, cmp = { .inw_vp = vp };
837 838 uint32_t set;
838 839
839 840 set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
840 841
841 842 /*
842 843 * Lookup our vnode to determine if we already have a watch on it.
843 844 */
844 845 mutex_enter(&state->ins_lock);
845 846
846 847 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
847 848 /*
848 849 * We don't have this watch; allocate a new one, provided that
849 850 * we have fewer than our limit.
850 851 */
851 852 if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
852 853 mutex_exit(&state->ins_lock);
853 854 return (ENOSPC);
854 855 }
855 856
856 857 VN_HOLD(vp);
857 858 watch = inotify_watch_add(state, NULL, NULL, vp, set);
858 859 *wdp = watch->inw_wd;
859 860 mutex_exit(&state->ins_lock);
860 861
861 862 return (0);
862 863 }
863 864
864 865 VERIFY(!watch->inw_zombie);
865 866
866 867 if (!(mask & IN_MASK_ADD)) {
867 868 /*
868 869 * Note that if we're resetting our event mask and we're
869 870 * transitioning from an event mask that includes child events
870 871 * to one that doesn't, there will be potentially some stale
871 872 * child watches. This is basically fine: they won't fire,
872 873 * and they will correctly be removed when the watch is
873 874 * removed.
874 875 */
875 876 watch->inw_mask = 0;
876 877 }
877 878
878 879 watch->inw_mask |= set;
879 880
880 881 *wdp = watch->inw_wd;
881 882
882 883 mutex_exit(&state->ins_lock);
883 884
884 885 return (0);
885 886 }
886 887
887 888 static int
888 889 inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
889 890 {
890 891 inotify_watch_t *watch, cmp = { .inw_vp = vp };
891 892 vnode_t *cvp;
892 893 int err;
893 894
894 895 /*
895 896 * Verify that the specified child doesn't have a directory component
896 897 * within it.
897 898 */
898 899 if (strchr(name, '/') != NULL)
899 900 return (EINVAL);
900 901
901 902 /*
902 903 * Lookup the underlying file. Note that this will succeed even if
903 904 * we don't have permissions to actually read the file.
904 905 */
905 906 if ((err = lookupnameat(name,
906 907 UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
907 908 return (err);
908 909 }
909 910
910 911 /*
911 912 * Use our vnode to find our watch, and then add our child watch to it.
912 913 */
913 914 mutex_enter(&state->ins_lock);
914 915
915 916 if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
916 917 /*
917 918 * This is unexpected -- it means that we don't have the
918 919 * watch that we thought we had.
919 920 */
920 921 mutex_exit(&state->ins_lock);
921 922 VN_RELE(cvp);
922 923 return (ENXIO);
923 924 }
924 925
925 926 /*
926 927 * Now lookup the child vnode in the watch; we'll only add it if it
927 928 * isn't already there.
928 929 */
929 930 cmp.inw_vp = cvp;
930 931
931 932 if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
932 933 mutex_exit(&state->ins_lock);
933 934 VN_RELE(cvp);
934 935 return (0);
935 936 }
936 937
937 938 watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
938 939 VERIFY(watch != NULL);
939 940 mutex_exit(&state->ins_lock);
940 941
941 942 return (0);
942 943 }
943 944
944 945 static int
945 946 inotify_rm_watch(inotify_state_t *state, int32_t wd)
946 947 {
947 948 inotify_watch_t *watch, cmp = { .inw_wd = wd };
948 949
949 950 mutex_enter(&state->ins_lock);
950 951
951 952 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
952 953 mutex_exit(&state->ins_lock);
953 954 return (EINVAL);
954 955 }
955 956
956 957 inotify_watch_remove(state, watch);
957 958 mutex_exit(&state->ins_lock);
958 959
959 960 return (0);
960 961 }
961 962
962 963 static int
963 964 inotify_activate(inotify_state_t *state, int32_t wd)
964 965 {
965 966 inotify_watch_t *watch, cmp = { .inw_wd = wd };
966 967
967 968 mutex_enter(&state->ins_lock);
968 969
969 970 if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
970 971 mutex_exit(&state->ins_lock);
971 972 return (EINVAL);
972 973 }
973 974
974 975 watch->inw_active = 1;
975 976
976 977 mutex_exit(&state->ins_lock);
977 978
978 979 return (0);
979 980 }
980 981
981 982 /*
982 983 * Called periodically as a cyclic to process the orphans and zombies.
983 984 */
984 985 static void
985 986 inotify_clean(void *arg)
986 987 {
987 988 inotify_state_t *state = arg;
988 989 inotify_watch_t *watch, *parent, *next, **prev;
989 990 cred_t *savecred;
990 991 int err;
991 992
992 993 mutex_enter(&state->ins_lock);
993 994
994 995 for (watch = list_head(&state->ins_orphans);
995 996 watch != NULL; watch = next) {
996 997 next = list_next(&state->ins_orphans, watch);
997 998
998 999 VERIFY(!watch->inw_zombie);
999 1000 VERIFY((parent = watch->inw_parent) != NULL);
1000 1001
1001 1002 if (watch->inw_vp->v_count > 1)
1002 1003 continue;
1003 1004
1004 1005 avl_remove(&parent->inw_children, watch);
1005 1006 err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
1006 1007 VERIFY(err == 0);
1007 1008
1008 1009 list_remove(&state->ins_orphans, watch);
1009 1010
1010 1011 /*
1011 1012 * For purposes of releasing the vnode, we need to switch our
1012 1013 * cred to be the cred of the orphaning thread (which we held
1013 1014 * at the time this watch was orphaned).
1014 1015 */
1015 1016 savecred = curthread->t_cred;
1016 1017 curthread->t_cred = watch->inw_cred;
1017 1018 VN_RELE(watch->inw_vp);
1018 1019 crfree(watch->inw_cred);
1019 1020 curthread->t_cred = savecred;
1020 1021
1021 1022 inotify_watch_zombify(watch);
1022 1023 }
1023 1024
1024 1025 prev = &state->ins_zombies;
1025 1026
1026 1027 while ((watch = *prev) != NULL) {
1027 1028 mutex_enter(&watch->inw_lock);
1028 1029
1029 1030 if (watch->inw_refcnt == 1) {
1030 1031 *prev = watch->inw_parent;
1031 1032 inotify_watch_destroy(watch);
1032 1033 continue;
1033 1034 }
1034 1035
1035 1036 prev = &watch->inw_parent;
1036 1037 mutex_exit(&watch->inw_lock);
1037 1038 }
1038 1039
1039 1040 mutex_exit(&state->ins_lock);
1040 1041 }
1041 1042
1042 1043 /*ARGSUSED*/
1043 1044 static int
1044 1045 inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1045 1046 {
1046 1047 inotify_state_t *state;
1047 1048 major_t major = getemajor(*devp);
1048 1049 minor_t minor = getminor(*devp);
1049 1050 int instances = 0;
1050 1051 char c[64];
1051 1052
1052 1053 if (minor != INOTIFYMNRN_INOTIFY)
1053 1054 return (ENXIO);
1054 1055
1055 1056 mutex_enter(&inotify_lock);
1056 1057
1057 1058 for (state = inotify_state; state != NULL; state = state->ins_next) {
1058 1059 if (state->ins_cred == cred_p)
1059 1060 instances++;
1060 1061 }
1061 1062
1062 1063 if (instances >= inotify_maxinstances) {
1063 1064 mutex_exit(&inotify_lock);
1064 1065 return (EMFILE);
1065 1066 }
1066 1067
1067 1068 minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
1068 1069 VM_BESTFIT | VM_SLEEP);
1069 1070
1070 1071 if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
1071 1072 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1072 1073 mutex_exit(&inotify_lock);
1073 1074 return (NULL);
1074 1075 }
1075 1076
1076 1077 state = ddi_get_soft_state(inotify_softstate, minor);
1077 1078 *devp = makedevice(major, minor);
1078 1079
1079 1080 crhold(cred_p);
1080 1081 state->ins_cred = cred_p;
1081 1082 state->ins_next = inotify_state;
1082 1083 inotify_state = state;
1083 1084
1084 1085 (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
1085 1086 state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
1086 1087 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
1087 1088
1088 1089 avl_create(&state->ins_bywd,
1089 1090 (int(*)(const void *, const void *))inotify_watch_cmpwd,
1090 1091 sizeof (inotify_watch_t),
1091 1092 offsetof(inotify_watch_t, inw_bywd));
1092 1093
1093 1094 avl_create(&state->ins_byvp,
1094 1095 (int(*)(const void *, const void *))inotify_watch_cmpvp,
1095 1096 sizeof (inotify_watch_t),
1096 1097 offsetof(inotify_watch_t, inw_byvp));
1097 1098
1098 1099 list_create(&state->ins_orphans, sizeof (inotify_watch_t),
1099 1100 offsetof(inotify_watch_t, inw_orphan));
1100 1101
1101 1102 state->ins_maxwatches = inotify_maxwatches;
1102 1103 state->ins_maxevents = inotify_maxevents;
1103 1104
1104 1105 mutex_exit(&inotify_lock);
1105 1106
1106 1107 state->ins_cleaner = ddi_periodic_add(inotify_clean,
1107 1108 state, NANOSEC, DDI_IPL_0);
1108 1109
1109 1110 return (0);
1110 1111 }
1111 1112
1112 1113 /*ARGSUSED*/
1113 1114 static int
1114 1115 inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
1115 1116 {
1116 1117 inotify_state_t *state;
1117 1118 inotify_kevent_t *event;
1118 1119 minor_t minor = getminor(dev);
1119 1120 int err = 0, nevents = 0;
1120 1121 size_t len;
1121 1122
1122 1123 state = ddi_get_soft_state(inotify_softstate, minor);
1123 1124
1124 1125 mutex_enter(&state->ins_lock);
1125 1126
1126 1127 while (state->ins_head == NULL) {
1127 1128 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
1128 1129 mutex_exit(&state->ins_lock);
1129 1130 return (EAGAIN);
1130 1131 }
1131 1132
1132 1133 if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
1133 1134 mutex_exit(&state->ins_lock);
1134 1135 return (EINTR);
1135 1136 }
1136 1137 }
1137 1138
1138 1139 /*
1139 1140 * We have events and we have our lock; return as many as we can.
1140 1141 */
1141 1142 while ((event = state->ins_head) != NULL) {
1142 1143 len = sizeof (event->ine_event) + event->ine_event.len;
1143 1144
1144 1145 if (uio->uio_resid < len) {
1145 1146 if (nevents == 0)
1146 1147 err = EINVAL;
1147 1148 break;
1148 1149 }
1149 1150
1150 1151 nevents++;
1151 1152
1152 1153 if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
1153 1154 break;
1154 1155
1155 1156 VERIFY(state->ins_nevents > 0);
1156 1157 state->ins_nevents--;
1157 1158
1158 1159 VERIFY(state->ins_size > 0);
1159 1160 state->ins_size -= len;
1160 1161
1161 1162 if ((state->ins_head = event->ine_next) == NULL) {
1162 1163 VERIFY(event == state->ins_tail);
1163 1164 VERIFY(state->ins_nevents == 0);
1164 1165 state->ins_tail = NULL;
1165 1166 }
1166 1167
1167 1168 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1168 1169 }
1169 1170
1170 1171 mutex_exit(&state->ins_lock);
1171 1172
1172 1173 return (err);
1173 1174 }
1174 1175
1175 1176 /*ARGSUSED*/
1176 1177 static int
1177 1178 inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
1178 1179 struct pollhead **phpp)
1179 1180 {
1180 1181 inotify_state_t *state;
1181 1182 minor_t minor = getminor(dev);
1182 1183
1183 1184 state = ddi_get_soft_state(inotify_softstate, minor);
1184 1185
1185 1186 mutex_enter(&state->ins_lock);
1186 1187
1187 1188 if (state->ins_head != NULL) {
1188 1189 *reventsp = events & (POLLRDNORM | POLLIN);
1189 1190 } else {
1190 1191 *reventsp = 0;
1191 1192
1192 1193 if (!anyyet)
1193 1194 *phpp = &state->ins_pollhd;
1194 1195 }
1195 1196
1196 1197 mutex_exit(&state->ins_lock);
1197 1198
1198 1199 return (0);
1199 1200 }
1200 1201
1201 1202 /*ARGSUSED*/
1202 1203 static int
1203 1204 inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
1204 1205 {
1205 1206 inotify_state_t *state;
1206 1207 minor_t minor = getminor(dev);
1207 1208 file_t *fp;
1208 1209 int rval;
1209 1210
1210 1211 state = ddi_get_soft_state(inotify_softstate, minor);
1211 1212
1212 1213 switch (cmd) {
1213 1214 case INOTIFYIOC_ADD_WATCH: {
1214 1215 inotify_addwatch_t addwatch;
1215 1216 file_t *fp;
1216 1217
1217 1218 if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
1218 1219 return (EFAULT);
1219 1220
1220 1221 if ((fp = getf(addwatch.inaw_fd)) == NULL)
1221 1222 return (EBADF);
1222 1223
1223 1224 rval = inotify_add_watch(state, fp->f_vnode,
1224 1225 addwatch.inaw_mask, rv);
1225 1226
1226 1227 releasef(addwatch.inaw_fd);
1227 1228 return (rval);
1228 1229 }
1229 1230
1230 1231 case INOTIFYIOC_ADD_CHILD: {
1231 1232 inotify_addchild_t addchild;
1232 1233 char name[MAXPATHLEN];
1233 1234
1234 1235 if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
1235 1236 return (EFAULT);
1236 1237
1237 1238 if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
1238 1239 return (EFAULT);
1239 1240
1240 1241 if ((fp = getf(addchild.inac_fd)) == NULL)
1241 1242 return (EBADF);
1242 1243
1243 1244 rval = inotify_add_child(state, fp->f_vnode, name);
1244 1245
1245 1246 releasef(addchild.inac_fd);
1246 1247 return (rval);
1247 1248 }
1248 1249
1249 1250 case INOTIFYIOC_RM_WATCH:
1250 1251 return (inotify_rm_watch(state, arg));
1251 1252
1252 1253 case INOTIFYIOC_ACTIVATE:
1253 1254 return (inotify_activate(state, arg));
1254 1255
1255 1256 case FIONREAD: {
1256 1257 int32_t size;
1257 1258
1258 1259 mutex_enter(&state->ins_lock);
1259 1260 size = state->ins_size;
1260 1261 mutex_exit(&state->ins_lock);
1261 1262
1262 1263 if (copyout(&size, (void *)arg, sizeof (size)) != 0)
1263 1264 return (EFAULT);
1264 1265
1265 1266 return (0);
1266 1267 }
1267 1268
1268 1269 default:
1269 1270 break;
1270 1271 }
1271 1272
1272 1273 return (ENOTTY);
1273 1274 }
1274 1275
1275 1276 /*ARGSUSED*/
1276 1277 static int
1277 1278 inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
1278 1279 {
1279 1280 inotify_state_t *state, **sp;
1280 1281 inotify_watch_t *watch, *zombies;
1281 1282 inotify_kevent_t *event;
1282 1283 minor_t minor = getminor(dev);
1283 1284
1284 1285 state = ddi_get_soft_state(inotify_softstate, minor);
1285 1286
1286 1287 if (state->ins_pollhd.ph_list != NULL) {
1287 1288 pollwakeup(&state->ins_pollhd, POLLERR);
1288 1289 pollhead_clean(&state->ins_pollhd);
1289 1290 }
1290 1291
1291 1292 mutex_enter(&state->ins_lock);
1292 1293
1293 1294 /*
1294 1295 * First, destroy all of our watches.
1295 1296 */
1296 1297 while ((watch = avl_first(&state->ins_bywd)) != NULL)
1297 1298 inotify_watch_remove(state, watch);
1298 1299
1299 1300 /*
1300 1301 * And now destroy our event queue.
1301 1302 */
1302 1303 while ((event = state->ins_head) != NULL) {
1303 1304 state->ins_head = event->ine_next;
1304 1305 kmem_free(event, INOTIFY_EVENT_LENGTH(event));
1305 1306 }
1306 1307
1307 1308 zombies = state->ins_zombies;
1308 1309 state->ins_zombies = NULL;
1309 1310 mutex_exit(&state->ins_lock);
1310 1311
1311 1312 /*
1312 1313 * Now that our state lock is dropped, we can synchronously wait on
1313 1314 * any zombies.
1314 1315 */
1315 1316 while ((watch = zombies) != NULL) {
1316 1317 zombies = zombies->inw_parent;
1317 1318
1318 1319 mutex_enter(&watch->inw_lock);
1319 1320
1320 1321 while (watch->inw_refcnt > 1)
1321 1322 cv_wait(&watch->inw_cv, &watch->inw_lock);
1322 1323
1323 1324 inotify_watch_destroy(watch);
1324 1325 }
1325 1326
1326 1327 if (state->ins_cleaner != NULL) {
1327 1328 ddi_periodic_delete(state->ins_cleaner);
1328 1329 state->ins_cleaner = NULL;
1329 1330 }
1330 1331
1331 1332 mutex_enter(&inotify_lock);
1332 1333
1333 1334 /*
1334 1335 * Remove our state from our global list, and release our hold on
1335 1336 * the cred.
1336 1337 */
1337 1338 for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
1338 1339 VERIFY(*sp != NULL);
1339 1340
1340 1341 *sp = (*sp)->ins_next;
1341 1342 crfree(state->ins_cred);
1342 1343 vmem_destroy(state->ins_wds);
1343 1344
1344 1345 ddi_soft_state_free(inotify_softstate, minor);
1345 1346 vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
1346 1347
1347 1348 mutex_exit(&inotify_lock);
1348 1349
1349 1350 return (0);
1350 1351 }
1351 1352
1352 1353 /*ARGSUSED*/
1353 1354 static int
1354 1355 inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1355 1356 {
1356 1357 mutex_enter(&inotify_lock);
1357 1358
1358 1359 if (ddi_soft_state_init(&inotify_softstate,
1359 1360 sizeof (inotify_state_t), 0) != 0) {
1360 1361 cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
1361 1362 mutex_exit(&inotify_lock);
1362 1363 return (DDI_FAILURE);
1363 1364 }
1364 1365
1365 1366 if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
1366 1367 INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
1367 1368 cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
1368 1369 ddi_soft_state_fini(&inotify_softstate);
1369 1370 mutex_exit(&inotify_lock);
1370 1371 return (DDI_FAILURE);
1371 1372 }
1372 1373
1373 1374 if (fem_create("inotify_fem",
1374 1375 inotify_vnodesrc_template, &inotify_femp) != 0) {
1375 1376 cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
1376 1377 ddi_remove_minor_node(devi, NULL);
1377 1378 ddi_soft_state_fini(&inotify_softstate);
1378 1379 mutex_exit(&inotify_lock);
1379 1380 return (DDI_FAILURE);
1380 1381 }
1381 1382
1382 1383 ddi_report_dev(devi);
1383 1384 inotify_devi = devi;
1384 1385
1385 1386 inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
1386 1387 UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
1387 1388 VM_SLEEP | VMC_IDENTIFIER);
1388 1389
1389 1390 mutex_exit(&inotify_lock);
1390 1391
1391 1392 return (DDI_SUCCESS);
1392 1393 }
1393 1394
1394 1395 /*ARGSUSED*/
1395 1396 static int
1396 1397 inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1397 1398 {
1398 1399 switch (cmd) {
1399 1400 case DDI_DETACH:
1400 1401 break;
1401 1402
1402 1403 case DDI_SUSPEND:
1403 1404 return (DDI_SUCCESS);
1404 1405
1405 1406 default:
1406 1407 return (DDI_FAILURE);
1407 1408 }
1408 1409
1409 1410 mutex_enter(&inotify_lock);
1410 1411 fem_free(inotify_femp);
1411 1412 vmem_destroy(inotify_minor);
1412 1413
1413 1414 ddi_remove_minor_node(inotify_devi, NULL);
1414 1415 inotify_devi = NULL;
1415 1416
1416 1417 ddi_soft_state_fini(&inotify_softstate);
1417 1418 mutex_exit(&inotify_lock);
1418 1419
1419 1420 return (DDI_SUCCESS);
1420 1421 }
1421 1422
1422 1423 /*ARGSUSED*/
1423 1424 static int
1424 1425 inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1425 1426 {
1426 1427 int error;
1427 1428
1428 1429 switch (infocmd) {
1429 1430 case DDI_INFO_DEVT2DEVINFO:
1430 1431 *result = (void *)inotify_devi;
1431 1432 error = DDI_SUCCESS;
1432 1433 break;
1433 1434 case DDI_INFO_DEVT2INSTANCE:
1434 1435 *result = (void *)0;
1435 1436 error = DDI_SUCCESS;
1436 1437 break;
1437 1438 default:
1438 1439 error = DDI_FAILURE;
1439 1440 }
1440 1441 return (error);
1441 1442 }
1442 1443
1443 1444 static struct cb_ops inotify_cb_ops = {
1444 1445 inotify_open, /* open */
1445 1446 inotify_close, /* close */
1446 1447 nulldev, /* strategy */
1447 1448 nulldev, /* print */
1448 1449 nodev, /* dump */
1449 1450 inotify_read, /* read */
1450 1451 nodev, /* write */
1451 1452 inotify_ioctl, /* ioctl */
1452 1453 nodev, /* devmap */
1453 1454 nodev, /* mmap */
1454 1455 nodev, /* segmap */
1455 1456 inotify_poll, /* poll */
1456 1457 ddi_prop_op, /* cb_prop_op */
1457 1458 0, /* streamtab */
1458 1459 D_NEW | D_MP /* Driver compatibility flag */
1459 1460 };
1460 1461
1461 1462 static struct dev_ops inotify_ops = {
1462 1463 DEVO_REV, /* devo_rev */
1463 1464 0, /* refcnt */
1464 1465 inotify_info, /* get_dev_info */
1465 1466 nulldev, /* identify */
1466 1467 nulldev, /* probe */
1467 1468 inotify_attach, /* attach */
1468 1469 inotify_detach, /* detach */
1469 1470 nodev, /* reset */
1470 1471 &inotify_cb_ops, /* driver operations */
1471 1472 NULL, /* bus operations */
1472 1473 nodev, /* dev power */
1473 1474 ddi_quiesce_not_needed, /* quiesce */
1474 1475 };
1475 1476
1476 1477 static struct modldrv modldrv = {
1477 1478 &mod_driverops, /* module type (this is a pseudo driver) */
1478 1479 "inotify support", /* name of module */
1479 1480 &inotify_ops, /* driver ops */
1480 1481 };
1481 1482
1482 1483 static struct modlinkage modlinkage = {
1483 1484 MODREV_1,
1484 1485 (void *)&modldrv,
1485 1486 NULL
1486 1487 };
1487 1488
1488 1489 int
1489 1490 _init(void)
1490 1491 {
1491 1492 return (mod_install(&modlinkage));
1492 1493 }
1493 1494
1494 1495 int
1495 1496 _info(struct modinfo *modinfop)
1496 1497 {
1497 1498 return (mod_info(&modlinkage, modinfop));
1498 1499 }
1499 1500
1500 1501 int
1501 1502 _fini(void)
1502 1503 {
1503 1504 return (mod_remove(&modlinkage));
1504 1505 }
|
↓ open down ↓ |
862 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX