Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/sys/buf.h
+++ new/usr/src/uts/common/sys/buf.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2012 Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 /*
31 31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 32 * The Regents of the University of California
33 33 * All Rights Reserved
34 34 *
35 35 * University Acknowledgment- Portions of this document are derived from
36 36 * software developed by the University of California, Berkeley, and its
37 37 * contributors.
38 38 */
39 39
40 40 #ifndef _SYS_BUF_H
41 41 #define _SYS_BUF_H
42 42
43 43 #include <sys/types32.h>
44 44 #include <sys/t_lock.h>
45 45 #include <sys/kstat.h>
46 46
47 47 #ifdef __cplusplus
48 48 extern "C" {
49 49 #endif
50 50
51 51 /*
52 52 * Each buffer in the pool is usually doubly linked into 2 lists:
53 53 * the device with which it is currently associated (always)
54 54 * and also on a list of blocks available for allocation
55 55 * for other use (usually).
56 56 * The latter list is kept in last-used order, and the two
57 57 * lists are doubly linked to make it easy to remove
58 58 * a buffer from one list when it was found by
59 59 * looking through the other.
60 60 * A buffer is on the available list, and is liable
61 61 * to be reassigned to another disk block, if and only
62 62 * if it is not marked BUSY. When a buffer is busy, the
63 63 * available-list pointers can be used for other purposes.
64 64 * Most drivers use the forward ptr as a link in their I/O active queue.
65 65 * A buffer header contains all the information required to perform I/O.
66 66 * Most of the routines which manipulate these things are in bio.c.
67 67 *
68 68 * There are a number of locks associated with the buffer management
69 69 * system.
70 70 * hbuf.b_lock: protects hash chains, buffer hdr freelists
71 71 * and delayed write freelist
72 72 * bfree_lock; protects the bfreelist structure
73 73 * bhdr_lock: protects the free header list
74 74 * blist_lock: protects b_list fields
75 75 * buf.b_sem: protects all remaining members in the buf struct
76 76 * buf.b_io: I/O synchronization variable
77 77 *
78 78 * A buffer header is never "locked" (b_sem) when it is on
79 79 * a "freelist" (bhdrlist or bfreelist avail lists).
80 80 */
81 81 typedef struct buf {
82 82 int b_flags; /* see defines below */
83 83 struct buf *b_forw; /* headed by d_tab of conf.c */
84 84 struct buf *b_back; /* " */
85 85 struct buf *av_forw; /* position on free list, */
86 86 struct buf *av_back; /* if not BUSY */
87 87 o_dev_t b_dev; /* OLD major+minor device name */
88 88 size_t b_bcount; /* transfer count */
89 89 union {
90 90 caddr_t b_addr; /* low order core address */
91 91 struct fs *b_fs; /* superblocks */
92 92 struct cg *b_cg; /* UFS cylinder group block */
93 93 struct dinode *b_dino; /* UFS ilist */
94 94 daddr32_t *b_daddr; /* disk blocks */
95 95 } b_un;
96 96
97 97 lldaddr_t _b_blkno; /* block # on device (union) */
98 98 #define b_lblkno _b_blkno._f
99 99 #ifdef _LP64
100 100 #define b_blkno _b_blkno._f
101 101 #else
102 102 #define b_blkno _b_blkno._p._l
103 103 #endif /* _LP64 */
104 104
105 105 char b_obs1; /* obsolete */
106 106 size_t b_resid; /* words not transferred after error */
107 107 clock_t b_start; /* request start time */
108 108 struct proc *b_proc; /* process doing physical or swap I/O */
109 109 struct page *b_pages; /* page list for PAGEIO */
110 110 clock_t b_obs2; /* obsolete */
111 111 /* Begin new stuff */
112 112 #define b_actf av_forw
113 113 #define b_actl av_back
114 114 #define b_active b_bcount
115 115 #define b_errcnt b_resid
116 116 size_t b_bufsize; /* size of allocated buffer */
117 117 int (*b_iodone)(struct buf *); /* function called by iodone */
118 118 struct vnode *b_vp; /* vnode associated with block */
119 119 struct buf *b_chain; /* chain together all buffers here */
120 120 int b_obs3; /* obsolete */
121 121 int b_error; /* expanded error field */
122 122 void *b_private; /* "opaque" driver private area */
123 123 dev_t b_edev; /* expanded dev field */
124 124 ksema_t b_sem; /* Exclusive access to buf */
125 125 ksema_t b_io; /* I/O Synchronization */
126 126 struct buf *b_list; /* List of potential B_DELWRI bufs */
127 127 struct page **b_shadow; /* shadow page list */
128 128 void *b_dip; /* device info pointer */
129 129 struct vnode *b_file; /* file associated with this buffer */
130 130 offset_t b_offset; /* offset in file assoc. with buffer */
131 131 } buf_t;
132 132
133 133 /*
134 134 * Bufhd structures used at the head of the hashed buffer queues.
135 135 * We only need seven words for this, so this abbreviated
136 136 * definition saves some space.
137 137 */
138 138 struct diskhd {
139 139 int b_flags; /* not used, needed for consistency */
140 140 struct buf *b_forw, *b_back; /* queue of unit queues */
141 141 struct buf *av_forw, *av_back; /* queue of bufs for this unit */
142 142 o_dev_t b_dev; /* OLD major+minor device name */
143 143 size_t b_bcount; /* transfer count */
144 144 };
145 145
146 146
147 147 /*
148 148 * Statistics on the buffer cache
149 149 */
150 150 struct biostats {
151 151 kstat_named_t bio_lookup; /* requests to assign buffer */
152 152 kstat_named_t bio_hit; /* buffer already associated with blk */
153 153 kstat_named_t bio_bufwant; /* kmem_allocs NOSLEEP failed new buf */
154 154 kstat_named_t bio_bufwait; /* kmem_allocs with KM_SLEEP for buf */
155 155 kstat_named_t bio_bufbusy; /* buffer locked by someone else */
156 156 kstat_named_t bio_bufdup; /* duplicate buffer found for block */
157 157 };
158 158
159 159 /*
160 160 * These flags are kept in b_flags.
161 161 * The first group is part of the DDI
162 162 */
163 163 #define B_BUSY 0x0001 /* not on av_forw/back list */
164 164 #define B_DONE 0x0002 /* transaction finished */
165 165 #define B_ERROR 0x0004 /* transaction aborted */
166 166 #define B_PAGEIO 0x0010 /* do I/O to pages on bp->p_pages */
167 167 #define B_PHYS 0x0020 /* Physical IO potentially using UNIBUS map */
168 168 #define B_READ 0x0040 /* read when I/O occurs */
169 169 #define B_WRITE 0x0100 /* non-read pseudo-flag */
170 170
171 171 /* Not part of the DDI */
172 172 #define B_WANTED 0x0080 /* issue wakeup when BUSY goes off */
173 173 #define B_AGE 0x000200 /* delayed write for correct aging */
174 174 #define B_ASYNC 0x000400 /* don't wait for I/O completion */
175 175 #define B_DELWRI 0x000800 /* delayed write-wait til buf needed */
176 176 #define B_STALE 0x001000 /* on av_* list; invalid contents */
177 177 #define B_DONTNEED 0x002000 /* after write, need not be cached */
178 178 #define B_REMAPPED 0x004000 /* buffer is kernel addressable */
179 179 #define B_FREE 0x008000 /* free page when done */
180 180 #define B_INVAL 0x010000 /* destroy page when done */
181 181 #define B_FORCE 0x020000 /* semi-permanent removal from cache */
182 182 #define B_NOCACHE 0x080000 /* don't cache block when released */
183 183 #define B_TRUNC 0x100000 /* truncate page without I/O */
184 184 #define B_SHADOW 0x200000 /* is b_shadow field valid? */
185 185 #define B_RETRYWRI 0x400000 /* retry write til works or bfinval */
186 186 #define B_FAILFAST 0x1000000 /* Fail promptly if device goes away */
187 187 #define B_STARTED 0x2000000 /* io:::start probe called for buf */
188 188 #define B_ABRWRITE 0x4000000 /* Application based recovery active */
189 189 #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
190 190 #define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */
191 191
192 192 /*
193 193 * There is some confusion over the meaning of B_FREE and B_INVAL and what
194 194 * the use of one over the other implies.
195 195 *
196 196 * In both cases, when we are done with the page (buffer) we want to free
197 197 * up the page. In the case of B_FREE, the page will go to the cachelist.
198 198 * In the case of B_INVAL, the page will be destroyed (hashed out of it's
199 199 * vnode) and placed on the freelist. Beyond this, there is no difference
200 200 * between the sole use of these two flags. In both cases, IO will be done
201 201 * if the page is not yet committed to storage.
202 202 *
203 203 * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is
204 204 * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no
205 205 * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then
206 206 * the mapping for the page is only invalidated for the current process.
207 207 * In this case, the page is not destroyed unless this was the final mapping.
208 208 *
209 209 * In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
210 210 * should be used.
211 211 *
212 212 * Use (B_INVAL | B_FORCE) to force the page to be destroyed even if we
213 213 * could not successfuly write out the page.
214 214 */
215 215
216 216 /*
217 217 * Insq/Remq for the buffer hash lists.
218 218 */
219 219 #define bremhash(bp) { \
220 220 ASSERT((bp)->b_forw != NULL); \
221 221 ASSERT((bp)->b_back != NULL); \
222 222 (bp)->b_back->b_forw = (bp)->b_forw; \
223 223 (bp)->b_forw->b_back = (bp)->b_back; \
224 224 (bp)->b_forw = (bp)->b_back = NULL; \
225 225 }
226 226 #define binshash(bp, dp) { \
227 227 ASSERT((bp)->b_forw == NULL); \
228 228 ASSERT((bp)->b_back == NULL); \
229 229 ASSERT((dp)->b_forw != NULL); \
230 230 ASSERT((dp)->b_back != NULL); \
231 231 (bp)->b_forw = (dp)->b_forw; \
232 232 (bp)->b_back = (dp); \
233 233 (dp)->b_forw->b_back = (bp); \
234 234 (dp)->b_forw = (bp); \
235 235 }
236 236
237 237
238 238 /*
239 239 * The hash structure maintains two lists:
240 240 *
241 241 * 1) The hash list of buffers (b_forw & b_back)
242 242 * 2) The LRU free list of buffers on this hash bucket (av_forw & av_back)
243 243 *
244 244 * The dwbuf structure keeps a list of delayed write buffers per hash bucket
245 245 * hence there are exactly the same number of dwbuf structures as there are
246 246 * the hash buckets (hbuf structures) in the system.
247 247 *
248 248 * The number of buffers on the freelist may not be equal to the number of
249 249 * buffers on the hash list. That is because when buffers are busy they are
250 250 * taken off the freelist but not off the hash list. "b_length" field keeps
251 251 * track of the number of free buffers (including delayed writes ones) on
252 252 * the hash bucket. The "b_lock" mutex protects the free list as well as
253 253 * the hash list. It also protects the counter "b_length".
254 254 *
255 255 * Enties b_forw, b_back, av_forw & av_back must be at the same offset
256 256 * as the ones in buf structure.
257 257 */
258 258 struct hbuf {
259 259 int b_flags;
260 260
261 261 struct buf *b_forw; /* hash list forw pointer */
262 262 struct buf *b_back; /* hash list back pointer */
263 263
264 264 struct buf *av_forw; /* free list forw pointer */
265 265 struct buf *av_back; /* free list back pointer */
266 266
267 267 int b_length; /* # of entries on free list */
268 268 kmutex_t b_lock; /* lock to protect this structure */
269 269 };
270 270
271 271
272 272 /*
273 273 * The delayed list pointer entries should match with the buf strcuture.
274 274 */
275 275 struct dwbuf {
276 276 int b_flags; /* not used */
277 277
278 278 struct buf *b_forw; /* not used */
279 279 struct buf *b_back; /* not used */
280 280
281 281 struct buf *av_forw; /* delayed write forw pointer */
282 282 struct buf *av_back; /* delayed write back pointer */
283 283 };
284 284
285 285
286 286 /*
287 287 * Unlink a buffer from the available (free or delayed write) list and mark
288 288 * it busy (internal interface).
289 289 */
290 290 #define notavail(bp) \
291 291 {\
292 292 ASSERT(SEMA_HELD(&bp->b_sem)); \
293 293 ASSERT((bp)->av_forw != NULL); \
294 294 ASSERT((bp)->av_back != NULL); \
295 295 ASSERT((bp)->av_forw != (bp)); \
296 296 ASSERT((bp)->av_back != (bp)); \
297 297 (bp)->av_back->av_forw = (bp)->av_forw; \
298 298 (bp)->av_forw->av_back = (bp)->av_back; \
299 299 (bp)->b_flags |= B_BUSY; \
300 300 (bp)->av_forw = (bp)->av_back = NULL; \
301 301 }
302 302
303 303 #if defined(_KERNEL)
304 304 /*
305 305 * Macros to avoid the extra function call needed for binary compat.
306 306 *
307 307 * B_RETRYWRI is not included in clear_flags for BWRITE(), BWRITE2(),
308 308 * or brwrite() so that the retry operation is persistent until the
309 309 * write either succeeds or the buffer is bfinval()'d.
310 310 *
311 311 */
312 312 #define BREAD(dev, blkno, bsize) \
313 313 bread_common(/* ufsvfsp */ NULL, dev, blkno, bsize)
314 314
315 315 #define BWRITE(bp) \
316 316 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 0, \
317 317 /* do_relse */ 1, \
318 318 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI))
319 319
320 320 #define BWRITE2(bp) \
321 321 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 1, \
322 322 /* do_relse */ 0, \
323 323 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI))
324 324
325 325 #define GETBLK(dev, blkno, bsize) \
326 326 getblk_common(/* ufsvfsp */ NULL, dev, blkno, bsize, /* errflg */ 0)
327 327
328 328
329 329 /*
330 330 * Macros for new retry write interfaces.
331 331 */
332 332
333 333 /*
334 334 * Same as bdwrite() except write failures are retried.
335 335 */
336 336 #define bdrwrite(bp) { \
337 337 (bp)->b_flags |= B_RETRYWRI; \
338 338 bdwrite((bp)); \
339 339 }
340 340
341 341 /*
342 342 * Same as bwrite() except write failures are retried.
343 343 */
344 344 #define brwrite(bp) { \
345 345 (bp)->b_flags |= B_RETRYWRI; \
346 346 bwrite_common((bp), /* force_wait */ 0, /* do_relse */ 1, \
347 347 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI)); \
348 348 }
349 349
350 350 extern struct hbuf *hbuf; /* Hash table */
351 351 extern struct dwbuf *dwbuf; /* delayed write hash table */
352 352 extern struct buf *buf; /* The buffer pool itself */
353 353 extern struct buf bfreelist; /* head of available list */
354 354
355 355 extern void (*bio_lufs_strategy)(void *, buf_t *); /* UFS Logging */
356 356 extern void (*bio_snapshot_strategy)(void *, buf_t *); /* UFS snapshots */
357 357
358 358 int bcheck(dev_t, struct buf *);
359 359 int iowait(struct buf *);
360 360 int hash2ints(int x, int y);
361 361 int bio_busy(int);
362 362 int biowait(struct buf *);
363 363 int biomodified(struct buf *);
364 364 int geterror(struct buf *);
365 365 void minphys(struct buf *);
366 366 /*
367 367 * ufsvfsp is declared as a void * to avoid having everyone that uses
368 368 * this header file include sys/fs/ufs_inode.h.
369 369 */
370 370 void bwrite_common(void *ufsvfsp, struct buf *, int force_wait,
371 371 int do_relse, int clear_flags);
372 372 void bwrite(struct buf *);
373 373 void bwrite2(struct buf *);
374 374 void bdwrite(struct buf *);
375 375 void bawrite(struct buf *);
376 376 void brelse(struct buf *);
377 377 void iodone(struct buf *);
378 378 void clrbuf(struct buf *);
379 379 void bflush(dev_t);
380 380 void blkflush(dev_t, daddr_t);
381 381 void binval(dev_t);
382 382 int bfinval(dev_t, int);
383 383 void binit(void);
384 384 void biodone(struct buf *);
385 385 void bioinit(struct buf *);
386 386 void biofini(struct buf *);
387 387 void bp_mapin(struct buf *);
388 388 void *bp_mapin_common(struct buf *, int);
389 389 void bp_mapout(struct buf *);
390 390 int bp_copyin(struct buf *, void *, offset_t, size_t);
391 391 int bp_copyout(void *, struct buf *, offset_t, size_t);
392 392 void bp_init(size_t, uint_t);
393 393 int bp_color(struct buf *);
394 394 void pageio_done(struct buf *);
395 395 struct buf *bread(dev_t, daddr_t, long);
396 396 struct buf *bread_common(void *, dev_t, daddr_t, long);
397 397 struct buf *breada(dev_t, daddr_t, daddr_t, long);
398 398 struct buf *getblk(dev_t, daddr_t, long);
399 399 struct buf *getblk_common(void *, dev_t, daddr_t, long, int);
400 400 struct buf *ngeteblk(long);
401 401 struct buf *geteblk(void);
402 402 struct buf *pageio_setup(struct page *, size_t, struct vnode *, int);
403 403 void bioerror(struct buf *bp, int error);
404 404 void bioreset(struct buf *bp);
405 405 struct buf *bioclone(struct buf *, off_t, size_t, dev_t, daddr_t,
406 406 int (*)(struct buf *), struct buf *, int);
407 407 size_t biosize(void);
408 408 #endif /* defined(_KERNEL) */
409 409
410 410 #ifdef __cplusplus
411 411 }
412 412 #endif
413 413
414 414 #endif /* _SYS_BUF_H */
|
↓ open down ↓ |
414 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX