1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39 #ifndef _SYS_FS_UFS_INODE_H
40 #define _SYS_FS_UFS_INODE_H
41
42 #include <sys/isa_defs.h>
43 #include <sys/fbuf.h>
44 #include <sys/fdbuffer.h>
45 #include <sys/fcntl.h>
46 #include <sys/uio.h>
47 #include <sys/t_lock.h>
48 #include <sys/thread.h>
49 #include <sys/cred.h>
50 #include <sys/time.h>
51 #include <sys/types32.h>
52 #include <sys/fs/ufs_fs.h>
53 #include <sys/fs/ufs_lockfs.h>
54 #include <sys/fs/ufs_trans.h>
55 #include <sys/kstat.h>
56 #include <sys/fs/ufs_acl.h>
57 #include <sys/fs/ufs_panic.h>
58 #include <sys/dnlc.h>
59
60 #ifdef _KERNEL
61 #include <sys/vfs_opreg.h>
62 #endif
63
64 #ifdef __cplusplus
65 extern "C" {
66 #endif
67
68 /*
69 * The I node is the focus of all local file activity in UNIX.
70 * There is a unique inode allocated for each active file,
71 * each current directory, each mounted-on file, each mapping,
72 * and the root. An inode is `named' by its dev/inumber pair.
73 * Data in icommon is read in from permanent inode on volume.
74 *
75 * Each inode has 5 locks associated with it:
76 * i_rwlock: Serializes ufs_write and ufs_setattr request
77 * and allows ufs_read requests to proceed in parallel.
78 * Serializes reads/updates to directories.
79 * vfs_dqrwlock: Manages quota sub-system quiescence. See below.
80 * i_contents: Protects almost all of the fields in the inode
81 * except for those listed below. When held
82 * in writer mode also protects those fields
83 * listed under i_tlock.
84 * i_tlock: When i_tlock is held with the i_contents reader
85 * lock the i_atime, i_mtime, i_ctime,
86 * i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag
87 * i_seq, i_writer & i_mapcnt fields are protected.
88 * For more i_flag locking info see below.
89 * ih_lock: Protects inode hash chain buckets
90 * ifree_lock: Protects inode freelist
91 *
92 * Lock ordering:
93 * i_rwlock > i_contents > i_tlock
94 * i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock
95 * i_contents > i_tlock
96 * vfs_dqrwlock > i_contents(writer) > i_tlock
97 * ih_lock > i_contents > i_tlock
98 *
99 * Making major changes to quota sub-system state, while the file
100 * system is mounted required the addition of another lock. The
101 * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs
102 * structure. This lock is used to manage quota sub-system quiescence
103 * for a particular file system. Major changes to quota sub-system
104 * state (disabling quotas, enabling quotas, and setting new quota
105 * limits) all require the file system to be quiescent and grabbing
106 * vfs_dqrwlock as writer accomplishes this. On the other hand,
107 * grabbing vfs_dqrwlock as reader makes the quota sub-system
108 * non-quiescent and lets the quota sub-system know that now is not a
109 * good time to change major quota sub-system state. Typically
110 * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for
111 * writing. However, there are cases where vfs_dqrwlock is grabbed for
112 * reading without a corresponding i_contents write grab because there
113 * is no relevant inode. There are also cases where i_contents is
114 * grabbed for writing when a vfs_dqrwlock read grab is not needed
115 * because the inode changes do not affect quotas.
116 *
117 * Unfortunately, performance considerations have required that we be more
118 * intelligent about using i_tlock when updating i_flag. Ideally, we would
119 * have simply separated out several of the bits in i_flag into their own
120 * ints to avoid problems. But, instead, we have implemented the following
121 * rules:
122 *
123 * o You can update any i_flag field while holding the writer-contents,
124 * or by holding the reader-contents AND holding i_tlock.
125 * You can only call ITIMES_NOLOCK while holding the writer-contents,
126 * or by holding the reader-contents AND holding i_tlock.
127 *
128 * o For a directory, holding the reader-rw_lock is sufficient for setting
129 * IACC.
130 *
131 * o Races with IREF are avoided by holding the reader contents lock
132 * and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage.
133 * And by holding the writer-contents in ufs_iinactive.
134 *
135 * o The callers are no longer required to handle the calls to ITIMES
136 * and ITIMES_NOLOCK. The functions that set the i_flag bits are
137 * responsible for managing those calls. The exceptions are the
138 * bmap routines.
139 *
140 * SVR4 Extended Fundamental Type (EFT) support:
141 * The inode structure has been enhanced to support
142 * 32-bit user-id, 32-bit group-id, and 32-bit device number.
143 * Standard SVR4 ufs also supports 32-bit mode field. For the reason
144 * of backward compatibility with the previous ufs disk format,
145 * 32-bit mode field is not supported.
146 *
147 * The current inode structure is 100% backward compatible with
148 * the previous inode structure if no user-id or group-id exceeds
149 * USHRT_MAX, and no major or minor number of a device number
150 * stored in an inode exceeds 255.
151 *
152 * Rules for managing i_seq:
153 * o i_seq is locked under the same rules as i_flag
154 * o The i_ctime or i_mtime MUST never change without increasing
155 * the value of i_seq.
156 * o You may increase the value of i_seq without the timestamps
157 * changing, this may decrease the callers performance but will
158 * be functionally correct.
159 * o The common case is when IUPD or ICHG is set, increase i_seq
160 * and immediately call ITIMES* or ufs_iupdat to create a new timestamp.
161 * o A less common case is the setting of IUPD or ICHG and while still
162 * holding the correct lock defer the timestamp and i_seq update
163 * until later, but it must still be done before the lock is released.
164 * bmap_write is an example of this, where the caller does the update.
165 * o If multiple changes are being made with the timestamps being
166 * updated only at the end, a single increase of i_seq is allowed.
167 * o If changes are made with IUPD or ICHG being set, but
168 * the controlling lock is being dropped before the timestamp is
169 * updated, there is a risk that another thread will also change
170 * the file, update i_flag, and push just one timestamp update.
171 * There is also the risk that another thread calls ITIMES or
172 * ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq,
173 * this will cause ufs_imark to change the timestamps without changing
174 * i_seq. If the controlling lock is dropped, ISEQ must be set to
175 * force i_seq to be increased on next ufs_imark, but i_seq MUST still
176 * be increased by the original setting thread before its deferred
177 * call to ITIMES to insure it is increased the correct number of times.
178 */
179
180 #define UID_LONG (o_uid_t)65535
181 /* flag value to indicate uid is 32-bit long */
182 #define GID_LONG (o_uid_t)65535
183 /* flag value to indicate gid is 32-bit long */
184
185 #define NDADDR 12 /* direct addresses in inode */
186 #define NIADDR 3 /* indirect addresses in inode */
187 #define FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t)
188 /* max fast symbolic name length is 56 */
189
190 #define i_fs i_ufsvfs->vfs_bufp->b_un.b_fs
191 #define i_vfs i_vnode->v_vfsp
192
193 struct icommon {
194 o_mode_t ic_smode; /* 0: mode and type of file */
195 short ic_nlink; /* 2: number of links to file */
196 o_uid_t ic_suid; /* 4: owner's user id */
197 o_gid_t ic_sgid; /* 6: owner's group id */
198 u_offset_t ic_lsize; /* 8: number of bytes in file */
199 #ifdef _KERNEL
200 struct timeval32 ic_atime; /* 16: time last accessed */
201 struct timeval32 ic_mtime; /* 24: time last modified */
202 struct timeval32 ic_ctime; /* 32: last time inode changed */
203 #else
204 time32_t ic_atime; /* 16: time last accessed */
205 int32_t ic_atspare;
206 time32_t ic_mtime; /* 24: time last modified */
207 int32_t ic_mtspare;
208 time32_t ic_ctime; /* 32: last time inode changed */
209 int32_t ic_ctspare;
210 #endif
211 daddr32_t ic_db[NDADDR]; /* 40: disk block addresses */
212 daddr32_t ic_ib[NIADDR]; /* 88: indirect blocks */
213 int32_t ic_flags; /* 100: cflags */
214 int32_t ic_blocks; /* 104: 512 byte blocks actually held */
215 int32_t ic_gen; /* 108: generation number */
216 int32_t ic_shadow; /* 112: shadow inode */
217 uid_t ic_uid; /* 116: long EFT version of uid */
218 gid_t ic_gid; /* 120: long EFT version of gid */
219 uint32_t ic_oeftflag; /* 124: extended attr directory ino, 0 = none */
220 };
221
222 /*
223 * Large directories can be cached. Directory caching can take the following
224 * states:
225 */
226 typedef enum {
227 CD_DISABLED_NOMEM = -2,
228 CD_DISABLED_TOOBIG,
229 CD_DISABLED,
230 CD_ENABLED
231 } cachedir_t;
232
233 /*
234 * Large Files: Note we use the inline functions load_double, store_double
235 * to load and store the long long values of i_size. Therefore the
236 * address of i_size must be eight byte aligned. Kmem_alloc of incore
237 * inode structure makes sure that the structure is 8-byte aligned.
238 * XX64 - reorder this structure?
239 */
240 typedef struct inode {
241 struct inode *i_chain[2]; /* must be first */
242 struct inode *i_freef; /* free list forward - must be before i_ic */
243 struct inode *i_freeb; /* free list back - must be before i_ic */
244 struct icommon i_ic; /* Must be here */
245 struct vnode *i_vnode; /* vnode associated with this inode */
246 struct vnode *i_devvp; /* vnode for block I/O */
247 dev_t i_dev; /* device where inode resides */
248 ino_t i_number; /* i number, 1-to-1 with device address */
249 off_t i_diroff; /* offset in dir, where we found last entry */
250 /* just a hint - no locking needed */
251 struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */
252 struct dquot *i_dquot; /* quota structure controlling this file */
253 krwlock_t i_rwlock; /* serializes write/setattr requests */
254 krwlock_t i_contents; /* protects (most of) inode contents */
255 kmutex_t i_tlock; /* protects time fields, i_flag */
256 offset_t i_nextr; /* */
257 /* next byte read offset (read-ahead) */
258 /* No lock required */
259 /* */
260 uint_t i_flag; /* inode flags */
261 uint_t i_seq; /* modification sequence number */
262 cachedir_t i_cachedir; /* Cache this directory on next lookup */
263 /* - no locking needed */
264 long i_mapcnt; /* mappings to file pages */
265 int *i_map; /* block list for the corresponding file */
266 dev_t i_rdev; /* INCORE rdev from i_oldrdev by ufs_iget */
267 size_t i_delaylen; /* delayed writes, units=bytes */
268 offset_t i_delayoff; /* where we started delaying */
269 offset_t i_nextrio; /* where to start the next clust */
270 long i_writes; /* number of outstanding bytes in write q */
271 kcondvar_t i_wrcv; /* sleep/wakeup for write throttle */
272 offset_t i_doff; /* dinode byte offset in file system */
273 si_t *i_ufs_acl; /* pointer to acl entry */
274 dcanchor_t i_danchor; /* directory cache anchor */
275 kthread_t *i_writer; /* thread which is in window in wrip() */
276 } inode_t;
277
278 struct dinode {
279 union {
280 struct icommon di_icom;
281 char di_size[128];
282 } di_un;
283 };
284
285 #define i_mode i_ic.ic_smode
286 #define i_nlink i_ic.ic_nlink
287 #define i_uid i_ic.ic_uid
288 #define i_gid i_ic.ic_gid
289 #define i_smode i_ic.ic_smode
290 #define i_suid i_ic.ic_suid
291 #define i_sgid i_ic.ic_sgid
292
293 #define i_size i_ic.ic_lsize
294 #define i_db i_ic.ic_db
295 #define i_ib i_ic.ic_ib
296
297 #define i_atime i_ic.ic_atime
298 #define i_mtime i_ic.ic_mtime
299 #define i_ctime i_ic.ic_ctime
300
301 #define i_shadow i_ic.ic_shadow
302 #define i_oeftflag i_ic.ic_oeftflag
303 #define i_blocks i_ic.ic_blocks
304 #define i_cflags i_ic.ic_flags
305 #ifdef _LITTLE_ENDIAN
306 /*
307 * Originally done on x86, but carried on to all other little
308 * architectures, which provides for file system compatibility.
309 */
310 #define i_ordev i_ic.ic_db[1] /* USL SVR4 compatibility */
311 #else
312 #define i_ordev i_ic.ic_db[0] /* was i_oldrdev */
313 #endif
314 #define i_gen i_ic.ic_gen
315 #define i_forw i_chain[0]
316 #define i_back i_chain[1]
317
318 /* EFT transition aids - obsolete */
319 #define oEFT_MAGIC 0x90909090
320 #define di_oeftflag di_ic.ic_oeftflag
321
322 #define di_ic di_un.di_icom
323 #define di_mode di_ic.ic_smode
324 #define di_nlink di_ic.ic_nlink
325 #define di_uid di_ic.ic_uid
326 #define di_gid di_ic.ic_gid
327 #define di_smode di_ic.ic_smode
328 #define di_suid di_ic.ic_suid
329 #define di_sgid di_ic.ic_sgid
330
331 #define di_size di_ic.ic_lsize
332 #define di_db di_ic.ic_db
333 #define di_ib di_ic.ic_ib
334
335 #define di_atime di_ic.ic_atime
336 #define di_mtime di_ic.ic_mtime
337 #define di_ctime di_ic.ic_ctime
338 #define di_cflags di_ic.ic_flags
339
340 #ifdef _LITTLE_ENDIAN
341 #define di_ordev di_ic.ic_db[1]
342 #else
343 #define di_ordev di_ic.ic_db[0]
344 #endif
345 #define di_shadow di_ic.ic_shadow
346 #define di_blocks di_ic.ic_blocks
347 #define di_gen di_ic.ic_gen
348
349 /* flags */
350 #define IUPD 0x0001 /* file has been modified */
351 #define IACC 0x0002 /* inode access time to be updated */
352 #define IMOD 0x0004 /* inode has been modified */
353 #define ICHG 0x0008 /* inode has been changed */
354 #define INOACC 0x0010 /* no access time update in getpage */
355 #define IMODTIME 0x0020 /* mod time already set */
356 #define IREF 0x0040 /* inode is being referenced */
357 #define ISYNC 0x0080 /* do all allocation synchronously */
358 #define IFASTSYMLNK 0x0100 /* fast symbolic link */
359 #define IMODACC 0x0200 /* only access time changed; */
360 /* filesystem won't become active */
361 #define IATTCHG 0x0400 /* only size/blocks have changed */
362 #define IBDWRITE 0x0800 /* the inode has been scheduled for */
363 /* write operation asynchronously */
364 #define ISTALE 0x1000 /* inode couldn't be read from disk */
365 #define IDEL 0x2000 /* inode is being deleted */
366 #define IDIRECTIO 0x4000 /* attempt directio */
367 #define ISEQ 0x8000 /* deferred i_seq increase */
368 #define IJUNKIQ 0x10000 /* on junk idle queue */
369 #define IQUIET 0x20000 /* No file system full messages */
370
371 /* cflags */
372 #define IXATTR 0x0001 /* extended attribute */
373 #define IFALLOCATE 0x0002 /* fallocate'd file */
374 #define ICOMPRESS 0x0004 /* compressed for dcfs - see */
375 /* `ufs_ioctl()`_FIO_COMPRESSED */
376
377 /* modes */
378 #define IFMT 0170000 /* type of file */
379 #define IFIFO 0010000 /* named pipe (fifo) */
380 #define IFCHR 0020000 /* character special */
381 #define IFDIR 0040000 /* directory */
382 #define IFBLK 0060000 /* block special */
383 #define IFREG 0100000 /* regular */
384 #define IFLNK 0120000 /* symbolic link */
385 #define IFSHAD 0130000 /* shadow indode */
386 #define IFSOCK 0140000 /* socket */
387 #define IFATTRDIR 0160000 /* Attribute directory */
388
389 #define ISUID 04000 /* set user id on execution */
390 #define ISGID 02000 /* set group id on execution */
391 #define ISVTX 01000 /* save swapped text even after use */
392 #define IREAD 0400 /* read, write, execute permissions */
393 #define IWRITE 0200
394 #define IEXEC 0100
395
396 /* specify how the inode info is written in ufs_syncip() */
397 #define I_SYNC 1 /* wait for the inode written to disk */
398 #define I_DSYNC 2 /* wait for the inode written to disk */
399 /* only if IATTCHG is set */
400 #define I_ASYNC 0 /* don't wait for the inode written */
401
402 /* flags passed to ufs_itrunc(), indirtrunc(), and free() */
403 #define I_FREE 0x00000001 /* inode is being freed */
404 #define I_DIR 0x00000002 /* inode is a directory */
405 #define I_IBLK 0x00000004 /* indirect block */
406 #define I_CHEAP 0x00000008 /* cheap free */
407 #define I_SHAD 0x00000010 /* inode is a shadow inode */
408 #define I_QUOTA 0x00000020 /* quota file */
409 #define I_NOCANCEL 0x40 /* Don't cancel these fragments */
410 #define I_ACCT 0x00000080 /* Update ufsvfs' unreclaimed_blocks */
411
412 /*
413 * If ufs_dircheckforname() fails to find an entry with the given name,
414 * this "slot" structure holds state for ufs_direnter_*() as to where
415 * there is space to put an entry with that name.
416 * If ufs_dircheckforname() finds an entry with the given name, this structure
417 * holds state for ufs_dirrename() and ufs_dirremove() as to where the
418 * entry is. "status" indicates what ufs_dircheckforname() found:
419 * NONE name not found, large enough free slot not found,
420 * FOUND name not found, large enough free slot found
421 * EXIST name found
422 * If ufs_dircheckforname() fails due to an error, this structure is not
423 * filled in.
424 *
425 * After ufs_dircheckforname() succeeds the values are:
426 * status offset size fbp, ep
427 * ------ ------ ---- -------
428 * NONE end of dir needed not valid
429 * FOUND start of entry of ent both valid if fbp != NULL
430 * EXIST start of entry of prev ent valid
431 *
432 * "endoff" is set to 0 if the an entry with the given name is found, or if no
433 * free slot could be found or made; this means that the directory should not
434 * be truncated. If the entry was found, the search terminates so
435 * ufs_dircheckforname() didn't find out where the last valid entry in the
436 * directory was, so it doesn't know where to cut the directory off; if no free
437 * slot could be found or made, the directory has to be extended to make room
438 * for the new entry, so there's nothing to cut off.
439 * Otherwise, "endoff" is set to the larger of the offset of the last
440 * non-empty entry in the directory, or the offset at which the new entry will
441 * be placed, whichever is larger. This is used by ufs_diraddentry(); if a new
442 * entry is to be added to the directory, any complete directory blocks at the
443 * end of the directory that contain no non-empty entries are lopped off the
444 * end, thus shrinking the directory dynamically.
445 */
446 typedef enum {NONE, FOUND, EXIST} slotstat_t;
447 struct ufs_slot {
448 struct direct *ep; /* pointer to slot */
449 struct fbuf *fbp; /* dir buf where slot is */
450 off_t offset; /* offset of area with free space */
451 off_t endoff; /* last useful location found in search */
452 slotstat_t status; /* status of slot */
453 int size; /* size of area at slotoffset */
454 int cached; /* cached directory */
455 };
456
457 /*
458 * Statistics on inodes
459 * Not protected by locks
460 */
461 struct instats {
462 kstat_named_t in_size; /* current cache size */
463 kstat_named_t in_maxsize; /* maximum cache size */
464 kstat_named_t in_hits; /* cache hits */
465 kstat_named_t in_misses; /* cache misses */
466 kstat_named_t in_malloc; /* kmem_alloce'd */
467 kstat_named_t in_mfree; /* kmem_free'd */
468 kstat_named_t in_maxreached; /* Largest size reached by cache */
469 kstat_named_t in_frfront; /* # put at front of freelist */
470 kstat_named_t in_frback; /* # put at back of freelist */
471 kstat_named_t in_qfree; /* q's to delete thread */
472 kstat_named_t in_scan; /* # inodes scanned */
473 kstat_named_t in_tidles; /* # inodes idled by idle thread */
474 kstat_named_t in_lidles; /* # inodes idled by ufs_lookup */
475 kstat_named_t in_vidles; /* # inodes idled by ufs_vget */
476 kstat_named_t in_kcalloc; /* # inodes kmem_cache_alloced */
477 kstat_named_t in_kcfree; /* # inodes kmem_cache_freed */
478 kstat_named_t in_poc; /* # push-on-close's */
479 };
480
481 #ifdef _KERNEL
482
483 /*
484 * Extended attributes
485 */
486
487 #define XATTR_DIR_NAME "/@/"
488 extern volatile int ufs_ninode; /* high-water mark for inode cache */
489
490 extern struct vnodeops *ufs_vnodeops; /* vnode operations for ufs */
491 extern const struct fs_operation_def ufs_vnodeops_template[];
492
493 /*
494 * Convert between inode pointers and vnode pointers
495 */
496 #define VTOI(VP) ((struct inode *)(VP)->v_data)
497 #define ITOV(IP) ((struct vnode *)(IP)->i_vnode)
498
499 /*
500 * convert to fs
501 */
502 #define ITOF(IP) ((struct fs *)(IP)->i_fs)
503
504 /*
505 * Convert between vnode types and inode formats
506 */
507 extern enum vtype iftovt_tab[];
508
509 #ifdef notneeded
510
511 /* Look at sys/mode.h and os/vnode.c */
512
513 extern int vttoif_tab[];
514
515 #endif
516
517 /*
518 * Mark an inode with the current (unique) timestamp.
519 * (Note that UFS's concept of time only keeps 32 bits of seconds
520 * in the on-disk format).
521 */
522 struct timeval32 iuniqtime;
523 extern kmutex_t ufs_iuniqtime_lock;
524
525 #define ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip)
526
527 #define ITIMES(ip) { \
528 mutex_enter(&(ip)->i_tlock); \
529 ITIMES_NOLOCK(ip); \
530 mutex_exit(&(ip)->i_tlock); \
531 }
532
533 /*
534 * The following interfaces are used to do atomic loads and stores
535 * of an inode's i_size, which is a long long data type.
536 *
537 * For LP64, we just to a load or a store - atomicity and alignment
538 * are 8-byte guaranteed. For x86 there are no such instructions,
539 * so we grab i_contents as reader to get the size; we already hold
540 * it as writer when we're setting the size.
541 */
542
543 #ifdef _LP64
544
545 #define UFS_GET_ISIZE(resultp, ip) *(resultp) = (ip)->i_size
546 #define UFS_SET_ISIZE(value, ip) (ip)->i_size = (value)
547
548 #else /* _LP64 */
549
550 #define UFS_GET_ISIZE(resultp, ip) \
551 { \
552 rw_enter(&(ip)->i_contents, RW_READER); \
553 *(resultp) = (ip)->i_size; \
554 rw_exit(&(ip)->i_contents); \
555 }
556 #define UFS_SET_ISIZE(value, ip) \
557 { \
558 ASSERT(RW_WRITE_HELD(&(ip)->i_contents)); \
559 (ip)->i_size = (value); \
560 }
561
562 #endif /* _LP64 */
563
564 /*
565 * Allocate the specified block in the inode
566 * and make sure any in-core pages are initialized.
567 */
568 #define BMAPALLOC(ip, off, size, cr) \
569 bmap_write((ip), (u_offset_t)(off), (size), BI_NORMAL, NULL, cr)
570
571 #define ESAME (-1) /* trying to rename linked files (special) */
572
573 #define UFS_HOLE (daddr32_t)-1 /* value used when no block allocated */
574
575 /*
576 * enums
577 */
578
579 /* direnter ops */
580 enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR};
581
582 /* dirremove ops */
583 enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME };
584
585 /*
586 * block initialization type for bmap_write
587 *
588 * BI_NORMAL - allocate and zero fill pages in memory
589 * BI_ALLOC_ONLY - only allocate the block, do not zero out pages in mem
590 * BI_FALLOCATE - allocate only, do not zero out pages, and store as negative
591 * block number in inode block list
592 */
593 enum bi_type { BI_NORMAL, BI_ALLOC_ONLY, BI_FALLOCATE };
594
595 /*
596 * This overlays the fid structure (see vfs.h)
597 *
598 * LP64 note: we use int32_t instead of ino_t since UFS does not use
599 * inode numbers larger than 32-bits and ufid's are passed to NFS
600 * which expects them to not grow in size beyond 10 bytes (12 including
601 * the length).
602 */
603 struct ufid {
604 ushort_t ufid_len;
605 ushort_t ufid_flags;
606 int32_t ufid_ino;
607 int32_t ufid_gen;
608 };
609
610 /*
611 * each ufs thread (see ufs_thread.c) is managed by this struct
612 */
613 struct ufs_q {
614 union uq_head {
615 void *_uq_generic; /* first entry on q */
616 struct inode *_uq_i;
617 ufs_failure_t *_uq_uf;
618 } _uq_head;
619 int uq_ne; /* # of entries/failures found */
620 int uq_lowat; /* thread runs when ne == lowat */
621 int uq_hiwat; /* synchronous idle if ne >= hiwat */
622 ushort_t uq_flags; /* flags (see below) */
623 kcondvar_t uq_cv; /* for sleep/wakeup */
624 kthread_id_t uq_threadp; /* thread managing this q */
625 kmutex_t uq_mutex; /* protects this struct */
626 };
627
628 #define uq_head _uq_head._uq_generic
629 #define uq_ihead _uq_head._uq_i
630 #define uq_ufhead _uq_head._uq_uf
631
632 /*
633 * uq_flags
634 */
635 #define UQ_EXIT (0x0001) /* q server exits at its convenience */
636 #define UQ_WAIT (0x0002) /* thread is waiting on q server */
637 #define UQ_SUSPEND (0x0004) /* request for suspension */
638 #define UQ_SUSPENDED (0x0008) /* thread has suspended itself */
639
640 /*
641 * When logging is enabled, statvfs must account for blocks and files that
642 * may be on the delete queue. Protected by ufsvfsp->vfs_delete.uq_mutex
643 */
644 struct ufs_delq_info {
645 u_offset_t delq_unreclaimed_blocks;
646 ulong_t delq_unreclaimed_files;
647 };
648
649
650 /*
651 * global idle queues
652 * The queues are sized dynamically in proportion to ufs_ninode
653 * which, unless overridden, scales with the amount of memory.
654 * The idle queue is halved whenever it hits the low water mark
655 * (1/4 of ufs_ninode), but can burst to sizes much larger. The number
656 * of hash queues is currently maintained to give on average IQHASHQLEN
657 * entries when the idle queue is at the low water mark.
658 * Note, we do not need to search along the hash queues, but use them
659 * in order to batch together geographically local inodes to allow
660 * their updates (via the log or buffer cache) to require less disk seeks.
661 * This gives an incredible performance boost for logging and a boost for
662 * non logging file systems.
663 */
664 typedef struct {
665 inode_t *i_chain[2]; /* must match inode_t, but unused */
666 inode_t *i_freef; /* must match inode_t, idle list forward */
667 inode_t *i_freeb; /* must match inode_t, idle list back */
668 } iqhead_t;
669
670 extern struct ufs_q ufs_idle_q; /* used by global ufs idle thread */
671 extern iqhead_t *ufs_junk_iq; /* junk idle queues */
672 extern iqhead_t *ufs_useful_iq; /* useful idle queues */
673 extern int ufs_njunk_iq; /* number of entries in junk iq */
674 extern int ufs_nuseful_iq; /* number of entries in useful iq */
675 extern int ufs_niqhash; /* number of iq hash qs - power of 2 */
676 extern int ufs_iqhashmask; /* iq hash mask = ufs_niqhash - 1 */
677
678 #define IQHASHQLEN 32 /* see comments above */
679 #define INOCGSHIFT 7 /* 128 inodes per cylinder group */
680 #define IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask)
681 #define IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */
682
683 extern struct ufs_q ufs_hlock; /* used by global ufs hlock thread */
684
685 /*
686 * vfs_lfflags flags
687 */
688 #define UFS_LARGEFILES ((ushort_t)0x1) /* set if mount allows largefiles */
689
690 /*
691 * vfs_dfritime flags
692 */
693 #define UFS_DFRATIME 0x1 /* deferred access time */
694
695 /*
696 * UFS VFS private data.
697 *
698 * UFS file system instances may be linked on several lists.
699 *
700 * - The vfs_next field chains together every extant ufs instance; this
701 * list is rooted at ufs_instances and should be used in preference to
702 * the overall vfs list (which is properly the province of the generic
703 * file system code, not of file system implementations). This same list
704 * link is used during forcible unmounts to chain together instances that
705 * can't yet be completely dismantled,
706 *
707 * - The vfs_wnext field is used within ufs_update to form a work list of
708 * UFS instances to be synced out.
709 */
710 typedef struct ufsvfs {
711 struct vfs *vfs_vfs; /* back link */
712 struct ufsvfs *vfs_next; /* instance list link */
713 struct ufsvfs *vfs_wnext; /* work list link */
714 struct vnode *vfs_root; /* root vnode */
715 struct buf *vfs_bufp; /* buffer containing superblock */
716 struct vnode *vfs_devvp; /* block device vnode */
717 ushort_t vfs_lfflags; /* Large files (set by mount) */
718 ushort_t vfs_qflags; /* QUOTA: filesystem flags */
719 struct inode *vfs_qinod; /* QUOTA: pointer to quota file */
720 uint_t vfs_btimelimit; /* QUOTA: block time limit */
721 uint_t vfs_ftimelimit; /* QUOTA: file time limit */
722 krwlock_t vfs_dqrwlock; /* QUOTA: protects quota fields */
723 /*
724 * some fs local threads
725 */
726 struct ufs_q vfs_delete; /* delayed inode delete */
727 struct ufs_q vfs_reclaim; /* reclaim open, deleted files */
728
729 /*
730 * This is copied from the super block at mount time.
731 */
732 int vfs_nrpos; /* # rotational positions */
733 /*
734 * This lock protects cg's and super block pointed at by
735 * vfs_bufp->b_fs. Locks contents of fs and cg's and contents
736 * of vfs_dio.
737 */
738 kmutex_t vfs_lock;
739 struct ulockfs vfs_ulockfs; /* ufs lockfs support */
740 uint_t vfs_dio; /* delayed io (_FIODIO) */
741 uint_t vfs_nointr; /* disallow lockfs interrupts */
742 uint_t vfs_nosetsec; /* disallow ufs_setsecattr */
743 uint_t vfs_syncdir; /* synchronous local directory ops */
744 uint_t vfs_dontblock; /* don't block on forced umount */
745
746 /*
747 * trans (logging ufs) stuff
748 */
749 uint_t vfs_domatamap; /* set if matamap enabled */
750 ulong_t vfs_maxacl; /* transaction stuff - max acl size */
751 ulong_t vfs_dirsize; /* logspace for directory creation */
752 ulong_t vfs_avgbfree; /* average free blks in cg (blkpref) */
753 /*
754 * Some useful constants
755 */
756 int vfs_nindirshift; /* calc. from fs_nindir */
757 int vfs_nindiroffset; /* calc. from fs_ninidr */
758 int vfs_ioclustsz; /* bytes in read/write cluster */
759 int vfs_iotransz; /* max device i/o transfer size */
760
761 vfs_ufsfx_t vfs_fsfx; /* lock/fix-on-panic support */
762 /*
763 * More useful constants
764 */
765 int vfs_minfrags; /* calc. from fs_minfree */
766 /*
767 * Force DirectIO on all files
768 */
769 uint_t vfs_forcedirectio;
770 /*
771 * Deferred inode time related fields
772 */
773 clock_t vfs_iotstamp; /* last I/O timestamp */
774 uint_t vfs_dfritime; /* deferred inode time flags */
775 /*
776 * Some more useful info
777 */
778 dev_t vfs_dev; /* device mounted from */
779 struct ml_unit *vfs_log; /* pointer to embedded log struct */
780 uint_t vfs_noatime; /* disable inode atime updates */
781 /*
782 * snapshot stuff
783 */
784 void *vfs_snapshot; /* snapshot handle */
785 /*
786 * Controls logging "file system full" messages to messages file
787 */
788 clock_t vfs_lastwhinetime;
789
790 int vfs_nolog_si; /* not logging summary info */
791 int vfs_validfs; /* indicates mounted fs */
792
793 /*
794 * Additional information about vfs_delete above
795 */
796 struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */
797 } ufsvfs_t;
798
799 #define vfs_fs vfs_bufp->b_un.b_fs
800
801 /*
802 * values for vfs_validfs
803 */
804 #define UT_UNMOUNTED 0
805 #define UT_MOUNTED 1
806 #define UT_HLOCKING 2
807
808 /* inohsz is guaranteed to be a power of 2 */
809 #define INOHASH(ino) (((int)ino) & (inohsz - 1))
810
811 #define ISFALLOCBLK(ip, bn) \
812 (((bn) < 0) && ((bn) % ip->i_fs->fs_frag == 0) && \
813 ((ip)->i_cflags & IFALLOCATE && (bn) != UFS_HOLE))
814
815 union ihead {
816 union ihead *ih_head[2];
817 struct inode *ih_chain[2];
818 };
819
820 extern union ihead *ihead;
821 extern kmutex_t *ih_lock;
822 extern int *ih_ne;
823 extern int inohsz;
824
825 extern clock_t ufs_iowait;
826
827 #endif /* _KERNEL */
828
829 /*
830 * ufs function prototypes
831 */
832 #if defined(_KERNEL) && !defined(_BOOT)
833
834 extern void ufs_iinit(void);
835 extern int ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *);
836 extern int ufs_iget_alloced(struct vfs *, ino_t, struct inode **,
837 cred_t *);
838 extern void ufs_reset_vnode(vnode_t *);
839 extern void ufs_iinactive(struct inode *);
840 extern void ufs_iupdat(struct inode *, int);
841 extern int ufs_rmidle(struct inode *);
842 extern int ufs_itrunc(struct inode *, u_offset_t, int, cred_t *);
843 extern int ufs_iaccess(struct inode *, int, cred_t *, int);
844 extern int rdip(struct inode *, struct uio *, int, struct cred *);
845 extern int wrip(struct inode *, struct uio *, int, struct cred *);
846
847 extern void ufs_imark(struct inode *);
848 extern void ufs_itimes_nolock(struct inode *);
849
850 extern int ufs_diraccess(struct inode *, int, struct cred *);
851 extern int ufs_dirlook(struct inode *, char *, struct inode **,
852 cred_t *, int, int);
853 extern int ufs_direnter_cm(struct inode *, char *, enum de_op,
854 struct vattr *, struct inode **, cred_t *, int);
855 extern int ufs_direnter_lr(struct inode *, char *, enum de_op,
856 struct inode *, struct inode *, cred_t *);
857 extern int ufs_dircheckpath(ino_t, struct inode *, struct inode *,
858 struct cred *);
859 extern int ufs_dirmakeinode(struct inode *, struct inode **,
860 struct vattr *, enum de_op, cred_t *);
861 extern int ufs_dirremove(struct inode *, char *, struct inode *,
862 vnode_t *, enum dr_op, cred_t *);
863 extern int ufs_dircheckforname(struct inode *, char *, int,
864 struct ufs_slot *, struct inode **, struct cred *, int);
865 extern int ufs_xattrdirempty(struct inode *, ino_t, cred_t *);
866 extern int blkatoff(struct inode *, off_t, char **, struct fbuf **);
867
868 extern void sbupdate(struct vfs *);
869
870 extern int ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **,
871 cred_t *);
872 extern void ufs_ifree(struct inode *, ino_t, mode_t);
873 extern void free(struct inode *, daddr_t, off_t, int);
874 extern int alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *);
875 extern int realloccg(struct inode *, daddr_t, daddr_t, int, int,
876 daddr_t *, cred_t *);
877 extern int ufs_allocsp(struct vnode *, struct flock64 *, cred_t *);
878 extern int ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *);
879 extern ino_t dirpref(inode_t *);
880 extern daddr_t blkpref(struct inode *, daddr_t, int, daddr32_t *);
881 extern daddr_t contigpref(ufsvfs_t *, size_t, size_t);
882
883 extern int ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t,
884 offset_t, enum uio_seg, int *, cred_t *);
885
886 extern int bmap_read(struct inode *, u_offset_t, daddr_t *, int *);
887 extern int bmap_write(struct inode *, u_offset_t, int, enum bi_type,
888 daddr_t *, struct cred *);
889 extern int bmap_has_holes(struct inode *);
890 extern int bmap_find(struct inode *, boolean_t, u_offset_t *);
891 extern int bmap_set_bn(struct vnode *, u_offset_t, daddr32_t);
892
893 extern void ufs_vfs_add(struct ufsvfs *);
894 extern void ufs_vfs_remove(struct ufsvfs *);
895
896 extern void ufs_sbwrite(struct ufsvfs *);
897 extern void ufs_update(int);
898 extern int ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
899 extern int ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
900 extern int ufs_syncip(struct inode *, int, int, top_t);
901 extern int ufs_sync_indir(struct inode *);
902 extern int ufs_indirblk_sync(struct inode *, offset_t);
903 extern int ufs_badblock(struct inode *, daddr_t);
904 extern int ufs_indir_badblock(struct inode *, daddr32_t *);
905 extern void ufs_notclean(struct ufsvfs *);
906 extern void ufs_checkclean(struct vfs *);
907 extern int isblock(struct fs *, uchar_t *, daddr_t);
908 extern void setblock(struct fs *, uchar_t *, daddr_t);
909 extern void clrblock(struct fs *, uchar_t *, daddr_t);
910 extern int isclrblock(struct fs *, uchar_t *, daddr_t);
911 extern void fragacct(struct fs *, int, int32_t *, int);
912 extern int skpc(char, uint_t, char *);
913 extern int ufs_fbwrite(struct fbuf *, struct inode *);
914 extern int ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long);
915 extern int ufs_putapage(struct vnode *, struct page *, u_offset_t *,
916 size_t *, int, struct cred *);
917 extern inode_t *ufs_alloc_inode(ufsvfs_t *, ino_t);
918 extern void ufs_free_inode(inode_t *);
919
920 /*
921 * special stuff
922 */
923 extern void ufs_setreclaim(struct inode *);
924 extern int ufs_scan_inodes(int, int (*)(struct inode *, void *), void *,
925 struct ufsvfs *);
926 extern int ufs_sync_inode(struct inode *, void *);
927 extern int ufs_sticky_remove_access(struct inode *, struct inode *,
928 struct cred *);
929 /*
930 * quota
931 */
932 extern int chkiq(struct ufsvfs *, int, struct inode *, uid_t, int,
933 struct cred *, char **errp, size_t *lenp);
934
935 /*
936 * ufs thread stuff
937 */
938 extern void ufs_thread_delete(struct vfs *);
939 extern void ufs_delete_drain(struct vfs *, int, int);
940 extern void ufs_delete(struct ufsvfs *, struct inode *, int);
941 extern void ufs_inode_cache_reclaim(void *);
942 extern void ufs_idle_drain(struct vfs *);
943 extern void ufs_idle_some(int);
944 extern void ufs_thread_idle(void);
945 extern void ufs_thread_reclaim(struct vfs *);
946 extern void ufs_thread_init(struct ufs_q *, int);
947 extern void ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *);
948 extern void ufs_thread_exit(struct ufs_q *);
949 extern void ufs_thread_suspend(struct ufs_q *);
950 extern void ufs_thread_continue(struct ufs_q *);
951 extern void ufs_thread_hlock(void *);
952 extern void ufs_delete_init(struct ufsvfs *, int);
953 extern void ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *);
954 extern void ufs_delete_drain_wait(struct ufsvfs *, int);
955
956 /*
957 * ufs lockfs stuff
958 */
959 struct seg;
960 extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int);
961 extern int ufs_quiesce(struct ulockfs *);
962 extern int ufs_flush(struct vfs *);
963 extern int ufs_fiolfs(struct vnode *, struct lockfs *, int);
964 extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int);
965 extern int ufs_fiolfss(struct vnode *, struct lockfs *);
966 extern int ufs_fioffs(struct vnode *, char *, struct cred *);
967 extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t);
968 extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t);
969 extern int ufs_lockfs_trybegin(struct ufsvfs *, struct ulockfs **, ulong_t);
970 extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **,
971 struct seg *, int, uint_t *);
972 extern void ufs_lockfs_end(struct ulockfs *);
973 /*
974 * ufs acl stuff
975 */
976 extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *);
977 extern void si_cache_init(void);
978 extern int ufs_si_load(struct inode *, cred_t *);
979 extern void ufs_si_del(struct inode *);
980 extern int ufs_acl_access(struct inode *, int, cred_t *);
981 extern void ufs_si_cache_flush(dev_t);
982 extern int ufs_si_free(si_t *, struct vfs *, cred_t *);
983 extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *);
984 extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *);
985 extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *);
986 /*
987 * ufs directio stuff
988 */
989 extern void ufs_directio_init();
990 extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *,
991 int *);
992 extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *);
993 #define DIRECTIO_FAILURE (0)
994 #define DIRECTIO_SUCCESS (1)
995
996 /*
997 * ufs extensions for PXFS
998 */
999
1000 int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb,
1001 int flags, cred_t *cr);
1002 int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb,
1003 int flags, cred_t *cr);
1004
1005 /*
1006 * prototypes to support the forced unmount
1007 */
1008
1009 void ufs_freeze(struct ulockfs *, struct lockfs *);
1010 int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *);
1011
1012 /*
1013 * extended attributes
1014 */
1015
1016 int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *);
1017 int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *);
1018 void ufs_unhook_shadow(inode_t *, inode_t *);
1019
1020 #endif /* defined(_KERNEL) && !defined(_BOOT) */
1021
1022 #ifdef __cplusplus
1023 }
1024 #endif
1025
1026 #endif /* _SYS_FS_UFS_INODE_H */