Print this page
re #13613 rb4516 Tunables needs volatile keyword
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/ufs/lufs_thread.c
+++ new/usr/src/uts/common/fs/ufs/lufs_thread.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 + */
25 28
26 29 #include <sys/systm.h>
27 30 #include <sys/types.h>
28 31 #include <sys/vnode.h>
29 32 #include <sys/errno.h>
30 33 #include <sys/sysmacros.h>
31 34 #include <sys/debug.h>
32 35 #include <sys/kmem.h>
33 36 #include <sys/conf.h>
34 37 #include <sys/proc.h>
35 38 #include <sys/cmn_err.h>
36 39 #include <sys/fssnap_if.h>
37 40 #include <sys/fs/ufs_inode.h>
38 41 #include <sys/fs/ufs_filio.h>
39 42 #include <sys/fs/ufs_log.h>
40 43 #include <sys/fs/ufs_bio.h>
41 44 #include <sys/inttypes.h>
42 45 #include <sys/callb.h>
43 46 #include <sys/tnf_probe.h>
44 47
45 48 /*
46 49 * Kernel threads for logging
47 50 * Currently only one for rolling the log (one per log).
48 51 */
49 52
50 53 #define LUFS_DEFAULT_NUM_ROLL_BUFS 16
51 54 #define LUFS_DEFAULT_MIN_ROLL_BUFS 4
52 55 #define LUFS_DEFAULT_MAX_ROLL_BUFS 64
53 56
54 57 /*
55 58 * Macros
56 59 */
57 60 #define logmap_need_roll(logmap) ((logmap)->mtm_nme > logmap_maxnme)
58 61 #define ldl_empty(ul) ((ul)->un_head_lof == (ul)->un_tail_lof)
59 62
60 63 /*
61 64 * Tunables
62 65 */
63 66 uint32_t lufs_num_roll_bufs = LUFS_DEFAULT_NUM_ROLL_BUFS;
64 67 uint32_t lufs_min_roll_bufs = LUFS_DEFAULT_MIN_ROLL_BUFS;
65 68 uint32_t lufs_max_roll_bufs = LUFS_DEFAULT_MAX_ROLL_BUFS;
66 69 long logmap_maxnme = 1536;
67 70 int trans_roll_tics = 0;
68 71 uint64_t trans_roll_new_delta = 0;
69 72 uint64_t lrr_wait = 0;
70 73 /*
|
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
71 74 * Key for thread specific data for the roll thread to
72 75 * bypass snapshot throttling
73 76 */
74 77 uint_t bypass_snapshot_throttle_key;
75 78
76 79 /*
77 80 * externs
78 81 */
79 82 extern kmutex_t ml_scan;
80 83 extern kcondvar_t ml_scan_cv;
81 -extern int maxphys;
82 84
83 85 static void
84 86 trans_roll_wait(mt_map_t *logmap, callb_cpr_t *cprinfop)
85 87 {
86 88 mutex_enter(&logmap->mtm_mutex);
87 89 logmap->mtm_ref = 0;
88 90 if (logmap->mtm_flags & MTM_FORCE_ROLL) {
89 91 cv_broadcast(&logmap->mtm_from_roll_cv);
90 92 }
91 93 logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLLING);
92 94 CALLB_CPR_SAFE_BEGIN(cprinfop);
93 95 (void) cv_reltimedwait(&logmap->mtm_to_roll_cv, &logmap->mtm_mutex,
94 96 trans_roll_tics, TR_CLOCK_TICK);
95 97 CALLB_CPR_SAFE_END(cprinfop, &logmap->mtm_mutex);
96 98 logmap->mtm_flags |= MTM_ROLLING;
97 99 mutex_exit(&logmap->mtm_mutex);
98 100 }
99 101
100 102 /*
101 103 * returns the number of 8K buffers to use for rolling the log
102 104 */
103 105 static uint32_t
104 106 log_roll_buffers()
105 107 {
106 108 /*
107 109 * sanity validate the tunable lufs_num_roll_bufs
108 110 */
109 111 if (lufs_num_roll_bufs < lufs_min_roll_bufs) {
110 112 return (lufs_min_roll_bufs);
111 113 }
112 114 if (lufs_num_roll_bufs > lufs_max_roll_bufs) {
113 115 return (lufs_max_roll_bufs);
114 116 }
115 117 return (lufs_num_roll_bufs);
116 118 }
117 119
118 120 /*
119 121 * Find something to roll, then if we don't have cached roll buffers
120 122 * covering all the deltas in that MAPBLOCK then read the master
121 123 * and overlay the deltas.
122 124 * returns;
123 125 * 0 if sucessful
124 126 * 1 on finding nothing to roll
125 127 * 2 on error
126 128 */
127 129 int
128 130 log_roll_read(ml_unit_t *ul, rollbuf_t *rbs, int nmblk, caddr_t roll_bufs,
129 131 int *retnbuf)
130 132 {
131 133 offset_t mof;
132 134 buf_t *bp;
133 135 rollbuf_t *rbp;
134 136 mt_map_t *logmap = ul->un_logmap;
135 137 daddr_t mblkno;
136 138 int i;
137 139 int error;
138 140 int nbuf;
139 141
140 142 /*
141 143 * Make sure there is really something to roll
142 144 */
143 145 mof = 0;
144 146 if (!logmap_next_roll(logmap, &mof)) {
145 147 return (1);
146 148 }
147 149
148 150 /*
149 151 * build some master blocks + deltas to roll forward
150 152 */
151 153 rw_enter(&logmap->mtm_rwlock, RW_READER);
152 154 nbuf = 0;
153 155 do {
154 156 mof = mof & (offset_t)MAPBLOCKMASK;
155 157 mblkno = lbtodb(mof);
156 158
157 159 /*
158 160 * Check for the case of a new delta to a set up buffer
159 161 */
160 162 for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) {
161 163 if (P2ALIGN(rbp->rb_bh.b_blkno,
162 164 MAPBLOCKSIZE / DEV_BSIZE) == mblkno) {
163 165 TNF_PROBE_0(trans_roll_new_delta, "lufs",
164 166 /* CSTYLED */);
165 167 trans_roll_new_delta++;
166 168 /* Flush out the current set of buffers */
167 169 goto flush_bufs;
168 170 }
169 171 }
170 172
171 173 /*
172 174 * Work out what to roll next. If it isn't cached then read
173 175 * it asynchronously from the master.
174 176 */
175 177 bp = &rbp->rb_bh;
176 178 bp->b_blkno = mblkno;
177 179 bp->b_flags = B_READ;
178 180 bp->b_un.b_addr = roll_bufs + (nbuf << MAPBLOCKSHIFT);
179 181 bp->b_bufsize = MAPBLOCKSIZE;
180 182 if (top_read_roll(rbp, ul)) {
181 183 /* logmap deltas were in use */
182 184 if (nbuf == 0) {
183 185 /*
184 186 * On first buffer wait for the logmap user
185 187 * to finish by grabbing the logmap lock
186 188 * exclusively rather than spinning
187 189 */
188 190 rw_exit(&logmap->mtm_rwlock);
189 191 lrr_wait++;
190 192 rw_enter(&logmap->mtm_rwlock, RW_WRITER);
191 193 rw_exit(&logmap->mtm_rwlock);
192 194 return (1);
193 195 }
194 196 /* we have at least one buffer - flush it */
195 197 goto flush_bufs;
196 198 }
197 199 if ((bp->b_flags & B_INVAL) == 0) {
198 200 nbuf++;
199 201 }
200 202 mof += MAPBLOCKSIZE;
201 203 } while ((nbuf < nmblk) && logmap_next_roll(logmap, &mof));
202 204
203 205 /*
204 206 * If there was nothing to roll cycle back
205 207 */
206 208 if (nbuf == 0) {
207 209 rw_exit(&logmap->mtm_rwlock);
208 210 return (1);
209 211 }
210 212
211 213 flush_bufs:
212 214 /*
213 215 * For each buffer, if it isn't cached then wait for the read to
214 216 * finish and overlay the deltas.
215 217 */
216 218 for (error = 0, i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) {
217 219 if (!rbp->rb_crb) {
218 220 bp = &rbp->rb_bh;
219 221 if (trans_not_wait(bp)) {
220 222 ldl_seterror(ul,
221 223 "Error reading master during ufs log roll");
222 224 error = 1;
223 225 }
224 226 /*
225 227 * sync read the data from the log
226 228 */
227 229 if (ldl_read(ul, bp->b_un.b_addr,
228 230 ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK,
229 231 MAPBLOCKSIZE, rbp->rb_age)) {
230 232 error = 1;
231 233 }
232 234 }
233 235
234 236 /*
235 237 * reset the age bit in the age list
236 238 */
237 239 logmap_list_put_roll(logmap, rbp->rb_age);
238 240
239 241 if (ul->un_flags & LDL_ERROR) {
240 242 error = 1;
241 243 }
242 244 }
243 245 rw_exit(&logmap->mtm_rwlock);
244 246 if (error)
245 247 return (2);
246 248 *retnbuf = nbuf;
247 249 return (0);
248 250 }
249 251
250 252 /*
251 253 * Write out a cached roll buffer
252 254 */
253 255 void
254 256 log_roll_write_crb(ufsvfs_t *ufsvfsp, rollbuf_t *rbp)
255 257 {
256 258 crb_t *crb = rbp->rb_crb;
257 259 buf_t *bp = &rbp->rb_bh;
258 260
259 261 bp->b_blkno = lbtodb(crb->c_mof);
260 262 bp->b_un.b_addr = crb->c_buf;
261 263 bp->b_bcount = crb->c_nb;
262 264 bp->b_bufsize = crb->c_nb;
263 265 ASSERT((crb->c_nb & DEV_BMASK) == 0);
264 266 bp->b_flags = B_WRITE;
265 267 logstats.ls_rwrites.value.ui64++;
266 268
267 269 /* if snapshots are enabled, call it */
268 270 if (ufsvfsp->vfs_snapshot) {
269 271 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
270 272 } else {
271 273 (void) bdev_strategy(bp);
272 274 }
273 275 }
274 276
275 277 /*
276 278 * Write out a set of non cached roll buffers
277 279 */
278 280 void
279 281 log_roll_write_bufs(ufsvfs_t *ufsvfsp, rollbuf_t *rbp)
280 282 {
281 283 buf_t *bp = &rbp->rb_bh;
282 284 buf_t *bp2;
283 285 rbsecmap_t secmap = rbp->rb_secmap;
284 286 int j, k;
285 287
286 288 ASSERT(secmap);
287 289 ASSERT((bp->b_flags & B_INVAL) == 0);
288 290
289 291 do { /* for each contiguous block of sectors */
290 292 /* find start of next sector to write */
291 293 for (j = 0; j < 16; ++j) {
292 294 if (secmap & UINT16_C(1))
293 295 break;
294 296 secmap >>= 1;
295 297 }
296 298 bp->b_un.b_addr += (j << DEV_BSHIFT);
297 299 bp->b_blkno += j;
298 300
299 301 /* calculate number of sectors */
300 302 secmap >>= 1;
301 303 j++;
302 304 for (k = 1; j < 16; ++j) {
303 305 if ((secmap & UINT16_C(1)) == 0)
304 306 break;
305 307 secmap >>= 1;
306 308 k++;
307 309 }
308 310 bp->b_bcount = k << DEV_BSHIFT;
309 311 bp->b_flags = B_WRITE;
310 312 logstats.ls_rwrites.value.ui64++;
311 313
312 314 /* if snapshots are enabled, call it */
313 315 if (ufsvfsp->vfs_snapshot)
314 316 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
315 317 else
316 318 (void) bdev_strategy(bp);
317 319 if (secmap) {
318 320 /*
319 321 * Allocate another buf_t to handle
320 322 * the next write in this MAPBLOCK
321 323 * Chain them via b_list.
322 324 */
323 325 bp2 = kmem_alloc(sizeof (buf_t), KM_SLEEP);
324 326 bp->b_list = bp2;
325 327 bioinit(bp2);
326 328 bp2->b_iodone = trans_not_done;
327 329 bp2->b_bufsize = MAPBLOCKSIZE;
328 330 bp2->b_edev = bp->b_edev;
329 331 bp2->b_un.b_addr =
330 332 bp->b_un.b_addr + bp->b_bcount;
331 333 bp2->b_blkno = bp->b_blkno + k;
332 334 bp = bp2;
333 335 }
334 336 } while (secmap);
335 337 }
336 338
337 339 /*
338 340 * Asynchronously roll the deltas, using the sector map
339 341 * in each rollbuf_t.
340 342 */
341 343 int
342 344 log_roll_write(ml_unit_t *ul, rollbuf_t *rbs, int nbuf)
343 345 {
344 346
345 347 ufsvfs_t *ufsvfsp = ul->un_ufsvfs;
346 348 rollbuf_t *rbp;
347 349 buf_t *bp, *bp2;
348 350 rollbuf_t *head, *prev, *rbp2;
349 351
350 352 /*
351 353 * Order the buffers by blkno
352 354 */
353 355 ASSERT(nbuf > 0);
354 356 #ifdef lint
355 357 prev = rbs;
356 358 #endif
357 359 for (head = rbs, rbp = rbs + 1; rbp < rbs + nbuf; rbp++) {
358 360 for (rbp2 = head; rbp2; prev = rbp2, rbp2 = rbp2->rb_next) {
359 361 if (rbp->rb_bh.b_blkno < rbp2->rb_bh.b_blkno) {
360 362 if (rbp2 == head) {
361 363 rbp->rb_next = head;
362 364 head = rbp;
363 365 } else {
364 366 prev->rb_next = rbp;
365 367 rbp->rb_next = rbp2;
366 368 }
367 369 break;
368 370 }
369 371 }
370 372 if (rbp2 == NULL) {
371 373 prev->rb_next = rbp;
372 374 rbp->rb_next = NULL;
373 375 }
374 376 }
375 377
376 378 /*
377 379 * issue the in-order writes
378 380 */
379 381 for (rbp = head; rbp; rbp = rbp2) {
380 382 if (rbp->rb_crb) {
381 383 log_roll_write_crb(ufsvfsp, rbp);
382 384 } else {
383 385 log_roll_write_bufs(ufsvfsp, rbp);
384 386 }
385 387 /* null out the rb_next link for next set of rolling */
386 388 rbp2 = rbp->rb_next;
387 389 rbp->rb_next = NULL;
388 390 }
389 391
390 392 /*
391 393 * wait for all the writes to finish
392 394 */
393 395 for (rbp = rbs; rbp < rbs + nbuf; rbp++) {
394 396 bp = &rbp->rb_bh;
395 397 if (trans_not_wait(bp)) {
396 398 ldl_seterror(ul,
397 399 "Error writing master during ufs log roll");
398 400 }
399 401
400 402 /*
401 403 * Now wait for all the "cloned" buffer writes (if any)
402 404 * and free those headers
403 405 */
404 406 bp2 = bp->b_list;
405 407 bp->b_list = NULL;
406 408 while (bp2) {
407 409 if (trans_not_wait(bp2)) {
408 410 ldl_seterror(ul,
409 411 "Error writing master during ufs log roll");
410 412 }
411 413 bp = bp2;
412 414 bp2 = bp2->b_list;
413 415 kmem_free(bp, sizeof (buf_t));
414 416 }
415 417 }
416 418
417 419 if (ul->un_flags & LDL_ERROR)
418 420 return (1);
419 421 return (0);
420 422 }
421 423
422 424 void
423 425 trans_roll(ml_unit_t *ul)
424 426 {
425 427 callb_cpr_t cprinfo;
426 428 mt_map_t *logmap = ul->un_logmap;
427 429 rollbuf_t *rbs;
428 430 rollbuf_t *rbp;
429 431 buf_t *bp;
430 432 caddr_t roll_bufs;
431 433 uint32_t nmblk;
432 434 int i;
433 435 int doingforceroll;
434 436 int nbuf;
435 437
436 438 CALLB_CPR_INIT(&cprinfo, &logmap->mtm_mutex, callb_generic_cpr,
437 439 "trans_roll");
438 440
439 441 /*
440 442 * We do not want the roll thread's writes to be
441 443 * throttled by the snapshot.
442 444 * If they are throttled then we can have a deadlock
443 445 * between the roll thread and the snapshot taskq thread:
444 446 * roll thread wants the throttling semaphore and
445 447 * the snapshot taskq thread cannot release the semaphore
446 448 * because it is writing to the log and the log is full.
447 449 */
448 450
449 451 (void) tsd_set(bypass_snapshot_throttle_key, (void*)1);
450 452
451 453 /*
452 454 * setup some roll parameters
453 455 */
454 456 if (trans_roll_tics == 0)
455 457 trans_roll_tics = 5 * hz;
456 458 nmblk = log_roll_buffers();
457 459
458 460 /*
459 461 * allocate the buffers and buffer headers
460 462 */
461 463 roll_bufs = kmem_alloc(nmblk * MAPBLOCKSIZE, KM_SLEEP);
462 464 rbs = kmem_alloc(nmblk * sizeof (rollbuf_t), KM_SLEEP);
463 465
464 466 /*
465 467 * initialize the buffer headers
466 468 */
467 469 for (i = 0, rbp = rbs; i < nmblk; ++i, ++rbp) {
468 470 rbp->rb_next = NULL;
469 471 bp = &rbp->rb_bh;
470 472 bioinit(bp);
471 473 bp->b_edev = ul->un_dev;
472 474 bp->b_iodone = trans_not_done;
473 475 bp->b_bufsize = MAPBLOCKSIZE;
474 476 }
475 477
476 478 doingforceroll = 0;
477 479
478 480 again:
479 481 /*
480 482 * LOOP FOREVER
481 483 */
482 484
483 485 /*
484 486 * exit on demand
485 487 */
486 488 mutex_enter(&logmap->mtm_mutex);
487 489 if ((ul->un_flags & LDL_ERROR) || (logmap->mtm_flags & MTM_ROLL_EXIT)) {
488 490 kmem_free(rbs, nmblk * sizeof (rollbuf_t));
489 491 kmem_free(roll_bufs, nmblk * MAPBLOCKSIZE);
490 492 logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_RUNNING |
491 493 MTM_ROLL_EXIT | MTM_ROLLING);
492 494 cv_broadcast(&logmap->mtm_from_roll_cv);
493 495 CALLB_CPR_EXIT(&cprinfo);
494 496 thread_exit();
495 497 /* NOTREACHED */
496 498 }
497 499
498 500 /*
499 501 * MT_SCAN debug mode
500 502 * don't roll except in FORCEROLL situations
501 503 */
502 504 if (logmap->mtm_debug & MT_SCAN)
503 505 if ((logmap->mtm_flags & MTM_FORCE_ROLL) == 0) {
504 506 mutex_exit(&logmap->mtm_mutex);
505 507 trans_roll_wait(logmap, &cprinfo);
506 508 goto again;
507 509 }
508 510 ASSERT(logmap->mtm_trimlof == 0);
509 511
510 512 /*
511 513 * If we've finished a force roll cycle then wakeup any
512 514 * waiters.
513 515 */
514 516 if (doingforceroll) {
515 517 doingforceroll = 0;
516 518 logmap->mtm_flags &= ~MTM_FORCE_ROLL;
517 519 mutex_exit(&logmap->mtm_mutex);
518 520 cv_broadcast(&logmap->mtm_from_roll_cv);
519 521 } else {
520 522 mutex_exit(&logmap->mtm_mutex);
521 523 }
522 524
523 525 /*
524 526 * If someone wants us to roll something; then do it
525 527 */
526 528 if (logmap->mtm_flags & MTM_FORCE_ROLL) {
527 529 doingforceroll = 1;
528 530 goto rollsomething;
529 531 }
530 532
531 533 /*
532 534 * Log is busy, check if logmap is getting full.
533 535 */
534 536 if (logmap_need_roll(logmap)) {
535 537 goto rollsomething;
536 538 }
537 539
538 540 /*
539 541 * Check if the log is idle and is not empty
540 542 */
541 543 if (!logmap->mtm_ref && !ldl_empty(ul)) {
542 544 goto rollsomething;
543 545 }
544 546
545 547 /*
546 548 * Log is busy, check if its getting full
547 549 */
548 550 if (ldl_need_roll(ul)) {
549 551 goto rollsomething;
550 552 }
551 553
552 554 /*
553 555 * nothing to do; wait a bit and then start over
554 556 */
555 557 trans_roll_wait(logmap, &cprinfo);
556 558 goto again;
557 559
558 560 /*
559 561 * ROLL SOMETHING
560 562 */
561 563
562 564 rollsomething:
563 565 /*
564 566 * Use the cached roll buffers, or read the master
565 567 * and overlay the deltas
566 568 */
567 569 switch (log_roll_read(ul, rbs, nmblk, roll_bufs, &nbuf)) {
568 570 case 1: trans_roll_wait(logmap, &cprinfo);
569 571 /* FALLTHROUGH */
570 572 case 2: goto again;
571 573 /* default case is success */
572 574 }
573 575
574 576 /*
575 577 * Asynchronously write out the deltas
576 578 */
577 579 if (log_roll_write(ul, rbs, nbuf))
578 580 goto again;
579 581
580 582 /*
581 583 * free up the deltas in the logmap
582 584 */
583 585 for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) {
584 586 bp = &rbp->rb_bh;
585 587 logmap_remove_roll(logmap,
586 588 ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK, MAPBLOCKSIZE);
587 589 }
588 590
589 591 /*
590 592 * free up log space; if possible
591 593 */
592 594 logmap_sethead(logmap, ul);
593 595
594 596 /*
595 597 * LOOP
596 598 */
597 599 goto again;
598 600 }
|
↓ open down ↓ |
507 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX