1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 #include <sys/systm.h>
30 #include <sys/types.h>
31 #include <sys/vnode.h>
32 #include <sys/errno.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/proc.h>
38 #include <sys/cmn_err.h>
39 #include <sys/fssnap_if.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/fs/ufs_filio.h>
42 #include <sys/fs/ufs_log.h>
43 #include <sys/fs/ufs_bio.h>
44 #include <sys/atomic.h>
45 #include <sys/sunddi.h>
46
47 extern uint_t bypass_snapshot_throttle_key;
48
49 extern struct kmem_cache *lufs_sv;
50 extern struct kmem_cache *lufs_bp;
51
52 static void
53 makebusy(ml_unit_t *ul, buf_t *bp)
54 {
55 sema_p(&bp->b_sem);
56 if ((bp->b_flags & B_ERROR) == 0)
57 return;
58 if (bp->b_flags & B_READ)
59 ldl_seterror(ul, "Error reading ufs log");
60 else
61 ldl_seterror(ul, "Error writing ufs log");
62 }
63
64 static int
65 logdone(buf_t *bp)
66 {
67 bp->b_flags |= B_DONE;
68
69 if (bp->b_flags & B_WRITE)
70 sema_v(&bp->b_sem);
71 else
72 /* wakeup the thread waiting on this buf */
73 sema_v(&bp->b_io);
74 return (0);
75 }
76
77 static int
78 ldl_strategy_done(buf_t *cb)
79 {
80 lufs_save_t *sv;
81 lufs_buf_t *lbp;
82 buf_t *bp;
83
84 ASSERT(SEMA_HELD(&cb->b_sem));
85 ASSERT((cb->b_flags & B_DONE) == 0);
86
87 /*
88 * Compute address of the ``save'' struct
89 */
90 lbp = (lufs_buf_t *)cb;
91 sv = (lufs_save_t *)lbp->lb_ptr;
92
93 if (cb->b_flags & B_ERROR)
94 sv->sv_error = 1;
95
96 /*
97 * If this is the last request, release the resources and
98 * ``done'' the original buffer header.
99 */
100 if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) {
101 kmem_cache_free(lufs_bp, lbp);
102 return (1);
103 }
104 /* Propagate any errors back to the original buffer header */
105 bp = sv->sv_bp;
106 if (sv->sv_error)
107 bp->b_flags |= B_ERROR;
108 kmem_cache_free(lufs_bp, lbp);
109 kmem_cache_free(lufs_sv, sv);
110
111 biodone(bp);
112 return (0);
113 }
114
115 /*
116 * Map the log logical block number to a physical disk block number
117 */
118 static int
119 map_frag(
120 ml_unit_t *ul,
121 daddr_t lblkno,
122 size_t bcount,
123 daddr_t *pblkno,
124 size_t *pbcount)
125 {
126 ic_extent_t *ext = ul->un_ebp->ic_extents;
127 uint32_t e = ul->un_ebp->ic_nextents;
128 uint32_t s = 0;
129 uint32_t i = e >> 1;
130 uint32_t lasti = i;
131 uint32_t bno_off;
132
133 again:
134 if (ext[i].ic_lbno <= lblkno) {
135 if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) {
136 /* FOUND IT */
137 bno_off = lblkno - (uint32_t)ext[i].ic_lbno;
138 *pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off));
139 *pblkno = ext[i].ic_pbno + bno_off;
140 return (0);
141 } else
142 s = i;
143 } else
144 e = i;
145 i = s + ((e - s) >> 1);
146
147 if (i == lasti) {
148 *pbcount = bcount;
149 return (ENOENT);
150 }
151 lasti = i;
152
153 goto again;
154 }
155
156 /*
157 * The log is a set of extents (which typically will be only one, but
158 * may be more if the disk was close to full when the log was created)
159 * and hence the logical offsets into the log
160 * have to be translated into their real device locations before
161 * calling the device's strategy routine. The translation may result
162 * in several IO requests if this request spans extents.
163 */
164 void
165 ldl_strategy(ml_unit_t *ul, buf_t *pb)
166 {
167 lufs_save_t *sv;
168 lufs_buf_t *lbp;
169 buf_t *cb;
170 ufsvfs_t *ufsvfsp = ul->un_ufsvfs;
171 daddr_t lblkno, pblkno;
172 size_t nb_left, pbcount;
173 off_t offset;
174 dev_t dev = ul->un_dev;
175 int error;
176 int read = pb->b_flags & B_READ;
177
178 /*
179 * Allocate and initialise the save stucture,
180 */
181 sv = kmem_cache_alloc(lufs_sv, KM_SLEEP);
182 sv->sv_error = 0;
183 sv->sv_bp = pb;
184 nb_left = pb->b_bcount;
185 sv->sv_nb_left = nb_left;
186
187 lblkno = pb->b_blkno;
188 offset = 0;
189
190 do {
191 error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount);
192
193 lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP);
194 bioinit(&lbp->lb_buf);
195 lbp->lb_ptr = sv;
196
197 cb = bioclone(pb, offset, pbcount, dev,
198 pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP);
199
200 offset += pbcount;
201 lblkno += btodb(pbcount);
202 nb_left -= pbcount;
203
204 if (error) {
205 cb->b_flags |= B_ERROR;
206 cb->b_resid = cb->b_bcount;
207 biodone(cb);
208 } else {
209 if (read) {
210 logstats.ls_ldlreads.value.ui64++;
211 ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
212 lwp_stat_update(LWP_STAT_INBLK, 1);
213 } else {
214 logstats.ls_ldlwrites.value.ui64++;
215 lwp_stat_update(LWP_STAT_OUBLK, 1);
216 }
217
218 /*
219 * write through the snapshot driver if necessary
220 * We do not want this write to be throttled because
221 * we are holding the un_log mutex here. If we
222 * are throttled in fssnap_translate, the fssnap_taskq
223 * thread which can wake us up can get blocked on
224 * the un_log mutex resulting in a deadlock.
225 */
226 if (ufsvfsp->vfs_snapshot) {
227 (void) tsd_set(bypass_snapshot_throttle_key,
228 (void *)1);
229 fssnap_strategy(&ufsvfsp->vfs_snapshot, cb);
230
231 (void) tsd_set(bypass_snapshot_throttle_key,
232 (void *)0);
233 } else {
234 (void) bdev_strategy(cb);
235 }
236 }
237
238 } while (nb_left);
239 }
240
241 static void
242 writelog(ml_unit_t *ul, buf_t *bp)
243 {
244 ASSERT(SEMA_HELD(&bp->b_sem));
245
246 /*
247 * This is really an B_ASYNC write but we want Presto to
248 * cache this write. The iodone routine, logdone, processes
249 * the buf correctly.
250 */
251 bp->b_flags = B_WRITE;
252 bp->b_edev = ul->un_dev;
253 bp->b_iodone = logdone;
254
255 /*
256 * return EIO for every IO if in hard error state
257 */
258 if (ul->un_flags & LDL_ERROR) {
259 bp->b_flags |= B_ERROR;
260 bp->b_error = EIO;
261 biodone(bp);
262 return;
263 }
264
265 ldl_strategy(ul, bp);
266 }
267
268 static void
269 readlog(ml_unit_t *ul, buf_t *bp)
270 {
271 ASSERT(SEMA_HELD(&bp->b_sem));
272 ASSERT(bp->b_bcount);
273
274 bp->b_flags = B_READ;
275 bp->b_edev = ul->un_dev;
276 bp->b_iodone = logdone;
277
278 /* all IO returns errors when in error state */
279 if (ul->un_flags & LDL_ERROR) {
280 bp->b_flags |= B_ERROR;
281 bp->b_error = EIO;
282 biodone(bp);
283 (void) trans_wait(bp);
284 return;
285 }
286
287 ldl_strategy(ul, bp);
288
289 if (trans_wait(bp))
290 ldl_seterror(ul, "Error reading ufs log");
291 }
292
293 /*
294 * NOTE: writers are single threaded thru the log layer.
295 * This means we can safely reference and change the cb and bp fields
296 * that ldl_read does not reference w/o holding the cb_rwlock or
297 * the bp makebusy lock.
298 */
299 static void
300 push_dirty_bp(ml_unit_t *ul, buf_t *bp)
301 {
302 buf_t *newbp;
303 cirbuf_t *cb = &ul->un_wrbuf;
304
305 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
306 ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0);
307
308 /*
309 * async write the buf
310 */
311 writelog(ul, bp);
312
313 /*
314 * no longer filling any buf
315 */
316 cb->cb_dirty = NULL;
317
318 /*
319 * no extra buffer space; all done
320 */
321 if (bp->b_bcount == bp->b_bufsize)
322 return;
323
324 /*
325 * give extra buffer space to a new bp
326 * try to take buf off of free list
327 */
328 if ((newbp = cb->cb_free) != NULL) {
329 cb->cb_free = newbp->b_forw;
330 } else {
331 newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
332 sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
333 sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
334 }
335 newbp->b_flags = 0;
336 newbp->b_bcount = 0;
337 newbp->b_file = NULL;
338 newbp->b_offset = -1;
339 newbp->b_bufsize = bp->b_bufsize - bp->b_bcount;
340 newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount;
341 bp->b_bufsize = bp->b_bcount;
342
343 /*
344 * lock out readers and put new buf at LRU position
345 */
346 rw_enter(&cb->cb_rwlock, RW_WRITER);
347 newbp->b_forw = bp->b_forw;
348 newbp->b_back = bp;
349 bp->b_forw->b_back = newbp;
350 bp->b_forw = newbp;
351 rw_exit(&cb->cb_rwlock);
352 }
353
354 static void
355 inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb)
356 {
357 buf_t *bp;
358 off_t elof = lof + nb;
359 off_t buflof;
360 off_t bufelof;
361
362 /*
363 * discard all bufs that overlap the range (lof, lof + nb)
364 */
365 rw_enter(&cb->cb_rwlock, RW_WRITER);
366 bp = cb->cb_bp;
367 do {
368 if (bp == cb->cb_dirty || bp->b_bcount == 0) {
369 bp = bp->b_forw;
370 continue;
371 }
372 buflof = dbtob(bp->b_blkno);
373 bufelof = buflof + bp->b_bcount;
374 if ((buflof < lof && bufelof <= lof) ||
375 (buflof >= elof && bufelof > elof)) {
376 bp = bp->b_forw;
377 continue;
378 }
379 makebusy(ul, bp);
380 bp->b_flags = 0;
381 bp->b_bcount = 0;
382 sema_v(&bp->b_sem);
383 bp = bp->b_forw;
384 } while (bp != cb->cb_bp);
385 rw_exit(&cb->cb_rwlock);
386 }
387
388 /*
389 * NOTE: writers are single threaded thru the log layer.
390 * This means we can safely reference and change the cb and bp fields
391 * that ldl_read does not reference w/o holding the cb_rwlock or
392 * the bp makebusy lock.
393 */
394 static buf_t *
395 get_write_bp(ml_unit_t *ul)
396 {
397 cirbuf_t *cb = &ul->un_wrbuf;
398 buf_t *bp;
399
400 /*
401 * cb_dirty is the buffer we are currently filling; if any
402 */
403 if ((bp = cb->cb_dirty) != NULL) {
404 makebusy(ul, bp);
405 return (bp);
406 }
407 /*
408 * discard any bp that overlaps the current tail since we are
409 * about to overwrite it.
410 */
411 inval_range(ul, cb, ul->un_tail_lof, 1);
412
413 /*
414 * steal LRU buf
415 */
416 rw_enter(&cb->cb_rwlock, RW_WRITER);
417 bp = cb->cb_bp->b_forw;
418 makebusy(ul, bp);
419
420 cb->cb_dirty = bp;
421 cb->cb_bp = bp;
422
423 bp->b_flags = 0;
424 bp->b_bcount = 0;
425 bp->b_blkno = btodb(ul->un_tail_lof);
426 ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof);
427 rw_exit(&cb->cb_rwlock);
428
429 /*
430 * NOTE:
431 * 1. un_tail_lof never addresses >= un_eol_lof
432 * 2. b_blkno + btodb(b_bufsize) may > un_eol_lof
433 * this case is handled in storebuf
434 */
435 return (bp);
436 }
437
438 void
439 alloc_wrbuf(cirbuf_t *cb, size_t bufsize)
440 {
441 int i;
442 buf_t *bp;
443
444 /*
445 * Clear previous allocation
446 */
447 if (cb->cb_nb)
448 free_cirbuf(cb);
449
450 bzero(cb, sizeof (*cb));
451 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
452
453 rw_enter(&cb->cb_rwlock, RW_WRITER);
454
455 /*
456 * preallocate 3 bp's and put them on the free list.
457 */
458 for (i = 0; i < 3; ++i) {
459 bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
460 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
461 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
462 bp->b_offset = -1;
463 bp->b_forw = cb->cb_free;
464 cb->cb_free = bp;
465 }
466
467 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
468 cb->cb_nb = bufsize;
469
470 /*
471 * first bp claims entire write buffer
472 */
473 bp = cb->cb_free;
474 cb->cb_free = bp->b_forw;
475
476 bp->b_forw = bp;
477 bp->b_back = bp;
478 cb->cb_bp = bp;
479 bp->b_un.b_addr = cb->cb_va;
480 bp->b_bufsize = cb->cb_nb;
481
482 rw_exit(&cb->cb_rwlock);
483 }
484
485 void
486 alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize)
487 {
488 caddr_t va;
489 size_t nb;
490 buf_t *bp;
491
492 /*
493 * Clear previous allocation
494 */
495 if (cb->cb_nb)
496 free_cirbuf(cb);
497
498 bzero(cb, sizeof (*cb));
499 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
500
501 rw_enter(&cb->cb_rwlock, RW_WRITER);
502
503 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
504 cb->cb_nb = bufsize;
505
506 /*
507 * preallocate N bufs that are hard-sized to blksize
508 * in other words, the read buffer pool is a linked list
509 * of statically sized bufs.
510 */
511 va = cb->cb_va;
512 while ((nb = bufsize) != 0) {
513 if (nb > blksize)
514 nb = blksize;
515 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
516 bzero(bp, sizeof (buf_t));
517 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
518 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
519 bp->b_un.b_addr = va;
520 bp->b_bufsize = nb;
521 if (cb->cb_bp) {
522 bp->b_forw = cb->cb_bp->b_forw;
523 bp->b_back = cb->cb_bp;
524 cb->cb_bp->b_forw->b_back = bp;
525 cb->cb_bp->b_forw = bp;
526 } else
527 bp->b_forw = bp->b_back = bp;
528 cb->cb_bp = bp;
529 bufsize -= nb;
530 va += nb;
531 }
532
533 rw_exit(&cb->cb_rwlock);
534 }
535
536 void
537 free_cirbuf(cirbuf_t *cb)
538 {
539 buf_t *bp;
540
541 if (cb->cb_nb == 0)
542 return;
543
544 rw_enter(&cb->cb_rwlock, RW_WRITER);
545 ASSERT(cb->cb_dirty == NULL);
546
547 /*
548 * free the active bufs
549 */
550 while ((bp = cb->cb_bp) != NULL) {
551 if (bp == bp->b_forw)
552 cb->cb_bp = NULL;
553 else
554 cb->cb_bp = bp->b_forw;
555 bp->b_back->b_forw = bp->b_forw;
556 bp->b_forw->b_back = bp->b_back;
557 sema_destroy(&bp->b_sem);
558 sema_destroy(&bp->b_io);
559 kmem_free(bp, sizeof (buf_t));
560 }
561
562 /*
563 * free the free bufs
564 */
565 while ((bp = cb->cb_free) != NULL) {
566 cb->cb_free = bp->b_forw;
567 sema_destroy(&bp->b_sem);
568 sema_destroy(&bp->b_io);
569 kmem_free(bp, sizeof (buf_t));
570 }
571 kmem_free(cb->cb_va, cb->cb_nb);
572 cb->cb_va = NULL;
573 cb->cb_nb = 0;
574 rw_exit(&cb->cb_rwlock);
575 rw_destroy(&cb->cb_rwlock);
576 }
577
578 static int
579 within_range(off_t lof, daddr_t blkno, ulong_t bcount)
580 {
581 off_t blof = dbtob(blkno);
582
583 return ((lof >= blof) && (lof < (blof + bcount)));
584 }
585
586 static buf_t *
587 find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
588 {
589 buf_t *bp;
590
591 /*
592 * find a buf that contains the offset lof
593 */
594 rw_enter(&cb->cb_rwlock, RW_READER);
595 bp = cb->cb_bp;
596 do {
597 if (bp->b_bcount &&
598 within_range(lof, bp->b_blkno, bp->b_bcount)) {
599 makebusy(ul, bp);
600 rw_exit(&cb->cb_rwlock);
601 return (bp);
602 }
603 bp = bp->b_forw;
604 } while (bp != cb->cb_bp);
605 rw_exit(&cb->cb_rwlock);
606
607 return (NULL);
608 }
609
610 static off_t
611 find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
612 {
613 buf_t *bp, *bpend;
614 off_t rlof;
615
616 /*
617 * we mustn't:
618 * o read past eol
619 * o read past the tail
620 * o read data that may be being written.
621 */
622 rw_enter(&cb->cb_rwlock, RW_READER);
623 bpend = bp = cb->cb_bp->b_forw;
624 rlof = ul->un_tail_lof;
625 do {
626 if (bp->b_bcount) {
627 rlof = dbtob(bp->b_blkno);
628 break;
629 }
630 bp = bp->b_forw;
631 } while (bp != bpend);
632 rw_exit(&cb->cb_rwlock);
633
634 if (lof <= rlof)
635 /* lof is prior to the range represented by the write buf */
636 return (rlof);
637 else
638 /* lof follows the range represented by the write buf */
639 return ((off_t)ul->un_eol_lof);
640 }
641
642 static buf_t *
643 get_read_bp(ml_unit_t *ul, off_t lof)
644 {
645 cirbuf_t *cb;
646 buf_t *bp;
647 off_t rlof;
648
649 /*
650 * retrieve as much data as possible from the incore buffers
651 */
652 if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) {
653 logstats.ls_lreadsinmem.value.ui64++;
654 return (bp);
655 }
656 if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) {
657 logstats.ls_lreadsinmem.value.ui64++;
658 return (bp);
659 }
660
661 /*
662 * steal the LRU buf
663 */
664 cb = &ul->un_rdbuf;
665 rw_enter(&cb->cb_rwlock, RW_WRITER);
666 bp = cb->cb_bp->b_forw;
667 makebusy(ul, bp);
668 bp->b_flags = 0;
669 bp->b_bcount = 0;
670 cb->cb_bp = bp;
671 rw_exit(&cb->cb_rwlock);
672
673 /*
674 * don't read past the tail or the end-of-log
675 */
676 bp->b_blkno = btodb(lof);
677 lof = dbtob(bp->b_blkno);
678 rlof = find_read_lof(ul, &ul->un_wrbuf, lof);
679 bp->b_bcount = MIN(bp->b_bufsize, rlof - lof);
680 readlog(ul, bp);
681 return (bp);
682 }
683
684 /*
685 * NOTE: writers are single threaded thru the log layer.
686 * This means we can safely reference and change the cb and bp fields
687 * that ldl_read does not reference w/o holding the cb_rwlock or
688 * the bp makebusy lock.
689 */
690 static int
691 extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp)
692 {
693 buf_t *bpforw = bp->b_forw;
694
695 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
696
697 /*
698 * there is no `next' bp; do nothing
699 */
700 if (bpforw == bp)
701 return (0);
702
703 /*
704 * buffer space is not adjacent; do nothing
705 */
706 if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr)
707 return (0);
708
709 /*
710 * locking protocol requires giving up any bp locks before
711 * acquiring cb_rwlock. This is okay because we hold
712 * un_log_mutex.
713 */
714 sema_v(&bp->b_sem);
715
716 /*
717 * lock out ldl_read
718 */
719 rw_enter(&cb->cb_rwlock, RW_WRITER);
720
721 /*
722 * wait for current IO to finish w/next bp; if necessary
723 */
724 makebusy(ul, bpforw);
725
726 /*
727 * free the next bp and steal its space
728 */
729 bp->b_forw = bpforw->b_forw;
730 bpforw->b_forw->b_back = bp;
731 bp->b_bufsize += bpforw->b_bufsize;
732 sema_v(&bpforw->b_sem);
733 bpforw->b_forw = cb->cb_free;
734 cb->cb_free = bpforw;
735 makebusy(ul, bp);
736 rw_exit(&cb->cb_rwlock);
737
738 return (1);
739 }
740
741 static size_t
742 storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb)
743 {
744 size_t copy_nb;
745 size_t nb_in_sec;
746 sect_trailer_t *st;
747 size_t nb_left = nb;
748 cirbuf_t *cb = &ul->un_wrbuf;
749
750 again:
751 nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount);
752 copy_nb = MIN(nb_left, nb_in_sec);
753
754 ASSERT(copy_nb);
755
756 bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb);
757 bp->b_bcount += copy_nb;
758 va += copy_nb;
759 nb_left -= copy_nb;
760 ul->un_tail_lof += copy_nb;
761
762 if ((nb_in_sec -= copy_nb) == 0) {
763 st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount);
764
765 st->st_tid = ul->un_logmap->mtm_tid;
766 st->st_ident = ul->un_tail_ident++;
767 bp->b_bcount += sizeof (sect_trailer_t);
768 ul->un_tail_lof += sizeof (sect_trailer_t);
769 /*
770 * log wrapped; async write this bp
771 */
772 if (ul->un_tail_lof == ul->un_eol_lof) {
773 ul->un_tail_lof = ul->un_bol_lof;
774 push_dirty_bp(ul, bp);
775 return (nb - nb_left);
776 }
777 /*
778 * out of bp space; get more or async write buf
779 */
780 if (bp->b_bcount == bp->b_bufsize) {
781 if (!extend_write_bp(ul, cb, bp)) {
782 push_dirty_bp(ul, bp);
783 return (nb - nb_left);
784 }
785 }
786 }
787 if (nb_left)
788 goto again;
789
790 sema_v(&bp->b_sem);
791 return (nb);
792 }
793
794 static void
795 fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me)
796 {
797 offset_t src_mof = me->me_mof;
798 size_t src_nb = me->me_nb;
799
800 if (src_mof > dst_mof) {
801 ASSERT(src_mof < (dst_mof + dst_nb));
802 dst_va += (src_mof - dst_mof);
803 dst_nb -= (src_mof - dst_mof);
804 } else {
805 ASSERT(dst_mof < (src_mof + src_nb));
806 src_nb -= (dst_mof - src_mof);
807 }
808
809 src_nb = MIN(src_nb, dst_nb);
810 ASSERT(src_nb);
811 bzero(dst_va, src_nb);
812 }
813
814 /*
815 * dst_va == NULL means don't copy anything
816 */
817 static ulong_t
818 fetchbuf(
819 ml_unit_t *ul,
820 buf_t *bp,
821 caddr_t dst_va,
822 size_t dst_nb,
823 off_t *dst_lofp)
824 {
825 caddr_t copy_va;
826 size_t copy_nb;
827 size_t nb_sec;
828 off_t dst_lof = *dst_lofp;
829 ulong_t sav_dst_nb = dst_nb;
830 ulong_t src_nb = bp->b_bcount;
831 off_t src_lof = dbtob(bp->b_blkno);
832 off_t src_elof = src_lof + src_nb;
833 caddr_t src_va = bp->b_un.b_addr;
834
835 /*
836 * copy from bp to dst_va
837 */
838 while (dst_nb) {
839 /*
840 * compute address within bp
841 */
842 copy_va = src_va + (dst_lof - src_lof);
843
844 /*
845 * adjust copy size to amount of data in bp
846 */
847 copy_nb = MIN(dst_nb, src_elof - dst_lof);
848
849 /*
850 * adjust copy size to amount of data in sector
851 */
852 nb_sec = NB_LEFT_IN_SECTOR(dst_lof);
853 copy_nb = MIN(copy_nb, nb_sec);
854
855 /*
856 * dst_va == NULL means don't do copy (see logseek())
857 */
858 if (dst_va) {
859 bcopy(copy_va, dst_va, copy_nb);
860 dst_va += copy_nb;
861 }
862 dst_lof += copy_nb;
863 dst_nb -= copy_nb;
864 nb_sec -= copy_nb;
865
866 /*
867 * advance over sector trailer
868 */
869 if (nb_sec == 0)
870 dst_lof += sizeof (sect_trailer_t);
871
872 /*
873 * exhausted buffer
874 * return current lof for next read
875 */
876 if (dst_lof == src_elof) {
877 sema_v(&bp->b_sem);
878 if (dst_lof == ul->un_eol_lof)
879 dst_lof = ul->un_bol_lof;
880 *dst_lofp = dst_lof;
881 return (sav_dst_nb - dst_nb);
882 }
883 }
884
885 /*
886 * copy complete - return current lof
887 */
888 sema_v(&bp->b_sem);
889 *dst_lofp = dst_lof;
890 return (sav_dst_nb);
891 }
892
893 void
894 ldl_round_commit(ml_unit_t *ul)
895 {
896 int wrapped;
897 buf_t *bp;
898 sect_trailer_t *st;
899 size_t bcount;
900 cirbuf_t *cb = &ul->un_wrbuf;
901
902 /*
903 * if nothing to write; then do nothing
904 */
905 if ((bp = cb->cb_dirty) == NULL)
906 return;
907 makebusy(ul, bp);
908
909 /*
910 * round up to sector boundary and set new tail
911 * don't readjust st_ident if buf is already rounded
912 */
913 bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE);
914 if (bcount == bp->b_bcount) {
915 sema_v(&bp->b_sem);
916 return;
917 }
918 bp->b_bcount = bcount;
919 ul->un_tail_lof = dbtob(bp->b_blkno) + bcount;
920 wrapped = 0;
921 if (ul->un_tail_lof == ul->un_eol_lof) {
922 ul->un_tail_lof = ul->un_bol_lof;
923 ++wrapped;
924 }
925 ASSERT(ul->un_tail_lof != ul->un_head_lof);
926
927 /*
928 * fix up the sector trailer
929 */
930 /* LINTED */
931 st = (sect_trailer_t *)
932 ((bp->b_un.b_addr + bcount) - sizeof (*st));
933 st->st_tid = ul->un_logmap->mtm_tid;
934 st->st_ident = ul->un_tail_ident++;
935
936 /*
937 * if tail wrapped or we have exhausted this buffer
938 * async write the buffer
939 */
940 if (wrapped || bcount == bp->b_bufsize)
941 push_dirty_bp(ul, bp);
942 else
943 sema_v(&bp->b_sem);
944 }
945
946 void
947 ldl_push_commit(ml_unit_t *ul)
948 {
949 buf_t *bp;
950 cirbuf_t *cb = &ul->un_wrbuf;
951
952 /*
953 * if nothing to write; then do nothing
954 */
955 if ((bp = cb->cb_dirty) == NULL)
956 return;
957 makebusy(ul, bp);
958 push_dirty_bp(ul, bp);
959 }
960
961 int
962 ldl_need_commit(ml_unit_t *ul)
963 {
964 return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2)));
965 }
966
967 int
968 ldl_has_space(ml_unit_t *ul, mapentry_t *me)
969 {
970 off_t nfb;
971 off_t nb;
972
973 ASSERT(MUTEX_HELD(&ul->un_log_mutex));
974
975 /*
976 * Add up the size used by the deltas
977 * round nb up to a sector length plus an extra sector
978 * w/o the extra sector we couldn't distinguish
979 * a full log (head == tail) from an empty log (head == tail)
980 */
981 for (nb = DEV_BSIZE; me; me = me->me_hash) {
982 nb += sizeof (struct delta);
983 if (me->me_dt != DT_CANCEL)
984 nb += me->me_nb;
985 }
986 nb = P2ROUNDUP(nb, DEV_BSIZE);
987
988 if (ul->un_head_lof <= ul->un_tail_lof)
989 nfb = (ul->un_head_lof - ul->un_bol_lof) +
990 (ul->un_eol_lof - ul->un_tail_lof);
991 else
992 nfb = ul->un_head_lof - ul->un_tail_lof;
993
994 return (nb < nfb);
995 }
996
997 void
998 ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me)
999 {
1000 buf_t *bp;
1001 caddr_t va;
1002 size_t nb;
1003 size_t actual;
1004
1005 ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1006
1007 /* Write the delta */
1008
1009 nb = sizeof (struct delta);
1010 va = (caddr_t)&me->me_delta;
1011 bp = get_write_bp(ul);
1012
1013 while (nb) {
1014 if (ul->un_flags & LDL_ERROR) {
1015 sema_v(&bp->b_sem);
1016 return;
1017 }
1018 actual = storebuf(ul, bp, va, nb);
1019 ASSERT(actual);
1020 va += actual;
1021 nb -= actual;
1022 if (nb)
1023 bp = get_write_bp(ul);
1024 }
1025
1026 /* If a commit, cancel, or 0's; we're almost done */
1027 switch (me->me_dt) {
1028 case DT_COMMIT:
1029 case DT_CANCEL:
1030 case DT_ABZERO:
1031 /* roll needs to know where the next delta will go */
1032 me->me_lof = ul->un_tail_lof;
1033 return;
1034 default:
1035 break;
1036 }
1037
1038 /* Now write the data */
1039
1040 ASSERT(me->me_nb != 0);
1041
1042 nb = me->me_nb;
1043 va = (me->me_mof - bufmof) + bufp;
1044 bp = get_write_bp(ul);
1045
1046 /* Save where we will put the data */
1047 me->me_lof = ul->un_tail_lof;
1048
1049 while (nb) {
1050 if (ul->un_flags & LDL_ERROR) {
1051 sema_v(&bp->b_sem);
1052 return;
1053 }
1054 actual = storebuf(ul, bp, va, nb);
1055 ASSERT(actual);
1056 va += actual;
1057 nb -= actual;
1058 if (nb)
1059 bp = get_write_bp(ul);
1060 }
1061 }
1062
1063 void
1064 ldl_waito(ml_unit_t *ul)
1065 {
1066 buf_t *bp;
1067 cirbuf_t *cb = &ul->un_wrbuf;
1068
1069 rw_enter(&cb->cb_rwlock, RW_WRITER);
1070 /*
1071 * wait on them
1072 */
1073 bp = cb->cb_bp;
1074 do {
1075 if ((bp->b_flags & B_DONE) == 0) {
1076 makebusy(ul, bp);
1077 sema_v(&bp->b_sem);
1078 }
1079 bp = bp->b_forw;
1080 } while (bp != cb->cb_bp);
1081 rw_exit(&cb->cb_rwlock);
1082 }
1083
1084 /*
1085 * seek nb bytes from location lof
1086 */
1087 static int
1088 logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp)
1089 {
1090 buf_t *bp;
1091 ulong_t actual;
1092
1093 while (nb) {
1094 bp = get_read_bp(ul, lof);
1095 if (bp->b_flags & B_ERROR) {
1096 sema_v(&bp->b_sem);
1097 return (EIO);
1098 }
1099 actual = fetchbuf(ul, bp, NULL, nb, &lof);
1100 ASSERT(actual);
1101 nb -= actual;
1102 }
1103 *lofp = lof;
1104 ASSERT(nb == 0);
1105 return (0);
1106 }
1107
1108 int
1109 ldl_read(
1110 ml_unit_t *ul, /* Log unit */
1111 caddr_t va, /* address of buffer to read into */
1112 offset_t mof, /* mof of buffer */
1113 off_t nb, /* length of buffer */
1114 mapentry_t *me) /* Map entry list */
1115 {
1116 buf_t *bp;
1117 crb_t *crb;
1118 caddr_t rva; /* address to read into */
1119 size_t rnb; /* # of bytes to read */
1120 off_t lof; /* log device offset to read from */
1121 off_t skip;
1122 ulong_t actual;
1123 int error;
1124 caddr_t eva = va + nb; /* end of buffer */
1125
1126 for (; me; me = me->me_agenext) {
1127 ASSERT(me->me_dt != DT_CANCEL);
1128
1129 /*
1130 * check for an cached roll buffer
1131 */
1132 crb = me->me_crb;
1133 if (crb) {
1134 if (mof > crb->c_mof) {
1135 /*
1136 * This mapentry overlaps with the beginning of
1137 * the supplied buffer
1138 */
1139 skip = mof - crb->c_mof;
1140 bcopy(crb->c_buf + skip, va,
1141 MIN(nb, crb->c_nb - skip));
1142 } else {
1143 /*
1144 * This mapentry starts at or after
1145 * the supplied buffer.
1146 */
1147 skip = crb->c_mof - mof;
1148 bcopy(crb->c_buf, va + skip,
1149 MIN(crb->c_nb, nb - skip));
1150 }
1151 logstats.ls_lreadsinmem.value.ui64++;
1152 continue;
1153 }
1154
1155 /*
1156 * check for a delta full of zeroes - there's no log data
1157 */
1158 if (me->me_dt == DT_ABZERO) {
1159 fetchzeroes(va, mof, nb, me);
1160 continue;
1161 }
1162
1163 if (mof > me->me_mof) {
1164 rnb = (size_t)(mof - me->me_mof);
1165 error = logseek(ul, me->me_lof, rnb, &lof);
1166 if (error)
1167 return (EIO);
1168 rva = va;
1169 rnb = me->me_nb - rnb;
1170 rnb = ((rva + rnb) > eva) ? eva - rva : rnb;
1171 } else {
1172 lof = me->me_lof;
1173 rva = (me->me_mof - mof) + va;
1174 rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb;
1175 }
1176
1177 while (rnb) {
1178 bp = get_read_bp(ul, lof);
1179 if (bp->b_flags & B_ERROR) {
1180 sema_v(&bp->b_sem);
1181 return (EIO);
1182 }
1183 ASSERT(((me->me_flags & ME_ROLL) == 0) ||
1184 (bp != ul->un_wrbuf.cb_dirty));
1185 actual = fetchbuf(ul, bp, rva, rnb, &lof);
1186 ASSERT(actual);
1187 rva += actual;
1188 rnb -= actual;
1189 }
1190 }
1191 return (0);
1192 }
1193
1194 void
1195 ldl_savestate(ml_unit_t *ul)
1196 {
1197 int error;
1198 buf_t *bp = ul->un_bp;
1199 ml_odunit_t *ud = (void *)bp->b_un.b_addr;
1200 ml_odunit_t *ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE);
1201
1202 #if DEBUG
1203 /*
1204 * Scan test is running; don't update intermediate state
1205 */
1206 if (ul->un_logmap && ul->un_logmap->mtm_trimlof)
1207 return;
1208 #endif /* DEBUG */
1209
1210 mutex_enter(&ul->un_state_mutex);
1211 bcopy(&ul->un_ondisk, ud, sizeof (*ud));
1212 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident;
1213 bcopy(ud, ud2, sizeof (*ud));
1214
1215 /* If a snapshot is enabled write through the shapshot driver. */
1216 if (ul->un_ufsvfs->vfs_snapshot)
1217 UFS_BWRITE2(ul->un_ufsvfs, bp);
1218 else
1219 BWRITE2(bp);
1220 logstats.ls_ldlwrites.value.ui64++;
1221 error = bp->b_flags & B_ERROR;
1222 mutex_exit(&ul->un_state_mutex);
1223 if (error)
1224 ldl_seterror(ul, "Error writing ufs log state");
1225 }
1226
1227 /*
1228 * The head will be set to (new_lof - header) since ldl_sethead is
1229 * called with the new_lof of the data portion of a delta.
1230 */
1231 void
1232 ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid)
1233 {
1234 off_t nb;
1235 off_t new_lof;
1236 uint32_t new_ident;
1237 daddr_t beg_blkno;
1238 daddr_t end_blkno;
1239
1240 ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1241
1242 if (data_lof == -1) {
1243 /* log is empty */
1244 new_ident = lufs_hd_genid(ul);
1245 new_lof = ul->un_tail_lof;
1246
1247 } else {
1248 /* compute header's lof */
1249 new_ident = ul->un_head_ident;
1250 new_lof = data_lof - sizeof (struct delta);
1251
1252 /* whoops, header spans sectors; subtract out sector trailer */
1253 if (btodb(new_lof) != btodb(data_lof))
1254 new_lof -= sizeof (sect_trailer_t);
1255
1256 /* whoops, header wrapped the log; go to last sector */
1257 if (new_lof < ul->un_bol_lof) {
1258 /* sector offset */
1259 new_lof -= dbtob(btodb(new_lof));
1260 /* add to last sector's lof */
1261 new_lof += (ul->un_eol_lof - DEV_BSIZE);
1262 }
1263 ul->un_head_tid = tid;
1264 }
1265
1266 /*
1267 * check for nop
1268 */
1269 if (new_lof == ul->un_head_lof)
1270 return;
1271
1272 /*
1273 * invalidate the affected bufs and calculate new ident
1274 */
1275 if (new_lof > ul->un_head_lof) {
1276 nb = new_lof - ul->un_head_lof;
1277 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
1278 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
1279
1280 end_blkno = btodb(new_lof);
1281 beg_blkno = btodb(ul->un_head_lof);
1282 new_ident += (end_blkno - beg_blkno);
1283 } else {
1284 nb = ul->un_eol_lof - ul->un_head_lof;
1285 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
1286 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
1287
1288 end_blkno = btodb(ul->un_eol_lof);
1289 beg_blkno = btodb(ul->un_head_lof);
1290 new_ident += (end_blkno - beg_blkno);
1291
1292 nb = new_lof - ul->un_bol_lof;
1293 inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb);
1294 inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb);
1295
1296 end_blkno = btodb(new_lof);
1297 beg_blkno = btodb(ul->un_bol_lof);
1298 new_ident += (end_blkno - beg_blkno);
1299 }
1300 /*
1301 * don't update the head if there has been an error
1302 */
1303 if (ul->un_flags & LDL_ERROR)
1304 return;
1305
1306 /* Fix up the head and ident */
1307 ASSERT(new_lof >= ul->un_bol_lof);
1308 ul->un_head_lof = new_lof;
1309 ul->un_head_ident = new_ident;
1310 if (data_lof == -1) {
1311 ul->un_tail_ident = ul->un_head_ident;
1312 }
1313
1314
1315 /* Commit to the database */
1316 ldl_savestate(ul);
1317
1318 ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) ||
1319 ldl_sethead_debug(ul));
1320 }
1321
1322 /*
1323 * The tail will be set to the sector following lof+nb
1324 * lof + nb == size of the last delta + commit record
1325 * this function is called once after the log scan has completed.
1326 */
1327 void
1328 ldl_settail(ml_unit_t *ul, off_t lof, size_t nb)
1329 {
1330 off_t new_lof;
1331 uint32_t new_ident;
1332 daddr_t beg_blkno;
1333 daddr_t end_blkno;
1334
1335 ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1336
1337 if (lof == -1) {
1338 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof));
1339 ul->un_head_lof = ul->un_tail_lof;
1340 ul->un_head_ident = lufs_hd_genid(ul);
1341 ul->un_tail_ident = ul->un_head_ident;
1342
1343 /* Commit to the database */
1344 ldl_savestate(ul);
1345
1346 return;
1347 }
1348
1349 /*
1350 * new_lof is the offset of the sector following the last commit
1351 */
1352 (void) logseek(ul, lof, nb, &new_lof);
1353 ASSERT(new_lof != dbtob(btodb(ul->un_head_lof)));
1354
1355 /*
1356 * calculate new ident
1357 */
1358 if (new_lof > ul->un_head_lof) {
1359 end_blkno = btodb(new_lof);
1360 beg_blkno = btodb(ul->un_head_lof);
1361 new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
1362 } else {
1363 end_blkno = btodb(ul->un_eol_lof);
1364 beg_blkno = btodb(ul->un_head_lof);
1365 new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
1366
1367 end_blkno = btodb(new_lof);
1368 beg_blkno = btodb(ul->un_bol_lof);
1369 new_ident += (end_blkno - beg_blkno);
1370 }
1371
1372 /* Fix up the tail and ident */
1373 ul->un_tail_lof = new_lof;
1374 ul->un_tail_ident = new_ident;
1375
1376 /* Commit to the database */
1377 ldl_savestate(ul);
1378 }
1379
1380 /*
1381 * LOGSCAN STUFF
1382 */
1383 static int
1384 ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof)
1385 {
1386 ulong_t ident;
1387 size_t nblk, i;
1388 sect_trailer_t *st;
1389
1390 /*
1391 * compute ident for first sector in the buffer
1392 */
1393 ident = ul->un_head_ident;
1394 if (bp->b_blkno >= btodb(ul->un_head_lof)) {
1395 ident += (bp->b_blkno - btodb(ul->un_head_lof));
1396 } else {
1397 ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof));
1398 ident += (bp->b_blkno - btodb(ul->un_bol_lof));
1399 }
1400 /*
1401 * truncate the buffer down to the last valid sector
1402 */
1403 nblk = btodb(bp->b_bcount);
1404 bp->b_bcount = 0;
1405 /* LINTED */
1406 st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE);
1407 for (i = 0; i < nblk; ++i) {
1408 if (st->st_ident != ident)
1409 break;
1410
1411 /* remember last valid tid for ldl_logscan_error() */
1412 ul->un_tid = st->st_tid;
1413
1414 /* LINTED */
1415 st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE);
1416 ++ident;
1417 bp->b_bcount += DEV_BSIZE;
1418 }
1419 /*
1420 * make sure that lof is still within range
1421 */
1422 return (within_range(lof, bp->b_blkno, bp->b_bcount));
1423 }
1424
1425 ulong_t
1426 ldl_logscan_nbcommit(off_t lof)
1427 {
1428 /*
1429 * lof is the offset following the commit header. However,
1430 * if the commit header fell on the end-of-sector, then lof
1431 * has already been advanced to the beginning of the next
1432 * sector. So do nothing. Otherwise, return the remaining
1433 * bytes in the sector.
1434 */
1435 if ((lof & (DEV_BSIZE - 1)) == 0)
1436 return (0);
1437 return (NB_LEFT_IN_SECTOR(lof));
1438 }
1439
1440 int
1441 ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va)
1442 {
1443 buf_t *bp;
1444 ulong_t actual;
1445
1446 ASSERT(ul->un_head_lof != ul->un_tail_lof);
1447
1448 /*
1449 * Check the log data doesn't go out of bounds
1450 */
1451 if (ul->un_head_lof < ul->un_tail_lof) {
1452 if (!WITHIN(*lofp, nb, ul->un_head_lof,
1453 (ul->un_tail_lof - ul->un_head_lof))) {
1454 return (EIO);
1455 }
1456 } else {
1457 if (OVERLAP(*lofp, nb, ul->un_tail_lof,
1458 (ul->un_head_lof - ul->un_tail_lof))) {
1459 return (EIO);
1460 }
1461 }
1462
1463 while (nb) {
1464 bp = get_read_bp(ul, *lofp);
1465 if (bp->b_flags & B_ERROR) {
1466 sema_v(&bp->b_sem);
1467 return (EIO);
1468 }
1469 /*
1470 * out-of-seq idents means partial transaction
1471 * panic, non-corrupting powerfail, ...
1472 */
1473 if (!ldl_logscan_ident(ul, bp, *lofp)) {
1474 sema_v(&bp->b_sem);
1475 return (EIO);
1476 }
1477 /*
1478 * copy the header into the caller's buf
1479 */
1480 actual = fetchbuf(ul, bp, va, nb, lofp);
1481 if (va)
1482 va += actual;
1483 nb -= actual;
1484 }
1485 return (0);
1486 }
1487
1488 void
1489 ldl_logscan_begin(ml_unit_t *ul)
1490 {
1491 size_t bufsize;
1492
1493 ASSERT(ul->un_wrbuf.cb_dirty == NULL);
1494
1495 /*
1496 * logscan has begun
1497 */
1498 ul->un_flags |= LDL_SCAN;
1499
1500 /*
1501 * reset the circular bufs
1502 */
1503 bufsize = ldl_bufsize(ul);
1504 alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize);
1505 alloc_wrbuf(&ul->un_wrbuf, bufsize);
1506
1507 /*
1508 * set the tail to reflect a full log
1509 */
1510 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE;
1511
1512 if (ul->un_tail_lof < ul->un_bol_lof)
1513 ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE;
1514 if (ul->un_tail_lof >= ul->un_eol_lof)
1515 ul->un_tail_lof = ul->un_bol_lof;
1516
1517 /*
1518 * un_tid is used during error processing; it is initialized to
1519 * the tid of the delta at un_head_lof;
1520 */
1521 ul->un_tid = ul->un_head_tid;
1522 }
1523
1524 void
1525 ldl_logscan_end(ml_unit_t *ul)
1526 {
1527 size_t bufsize;
1528
1529 /*
1530 * reset the circular bufs
1531 */
1532 bufsize = ldl_bufsize(ul);
1533 alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE);
1534 alloc_wrbuf(&ul->un_wrbuf, bufsize);
1535
1536 /*
1537 * Done w/scan
1538 */
1539 ul->un_flags &= ~LDL_SCAN;
1540 }
1541
1542 int
1543 ldl_need_roll(ml_unit_t *ul)
1544 {
1545 off_t busybytes;
1546 off_t head;
1547 off_t tail;
1548 off_t bol;
1549 off_t eol;
1550 off_t nb;
1551
1552 /*
1553 * snapshot the log state
1554 */
1555 head = ul->un_head_lof;
1556 tail = ul->un_tail_lof;
1557 bol = ul->un_bol_lof;
1558 eol = ul->un_eol_lof;
1559 nb = ul->un_logsize;
1560
1561 /*
1562 * compute number of busy (inuse) bytes
1563 */
1564 if (head <= tail)
1565 busybytes = tail - head;
1566 else
1567 busybytes = (eol - head) + (tail - bol);
1568
1569 /*
1570 * return TRUE if > 75% full
1571 */
1572 return (busybytes > (nb - (nb >> 2)));
1573 }
1574
1575 void
1576 ldl_seterror(ml_unit_t *ul, char *why)
1577 {
1578 /*
1579 * already in error state; do nothing
1580 */
1581 if (ul->un_flags & LDL_ERROR)
1582 return;
1583
1584 ul->un_flags |= LDL_ERROR; /* incore */
1585 ul->un_badlog = 1; /* ondisk (cleared by fsck) */
1586
1587 /*
1588 * Commit to state sectors
1589 */
1590 uniqtime(&ul->un_timestamp);
1591 ldl_savestate(ul);
1592
1593 /* Pretty print */
1594 cmn_err(CE_WARN, "%s", why);
1595 cmn_err(CE_WARN, "ufs log for %s changed state to Error",
1596 ul->un_ufsvfs->vfs_fs->fs_fsmnt);
1597 cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)",
1598 ul->un_ufsvfs->vfs_fs->fs_fsmnt);
1599
1600 /*
1601 * If we aren't in the middle of scan (aka snarf); tell ufs
1602 * to hard lock itself.
1603 */
1604 if ((ul->un_flags & LDL_SCAN) == 0)
1605 ufs_trans_onerror();
1606 }
1607
1608 size_t
1609 ldl_bufsize(ml_unit_t *ul)
1610 {
1611 size_t bufsize;
1612 extern uint32_t ldl_minbufsize;
1613
1614 /*
1615 * initial guess is the maxtransfer value for this log device
1616 * increase if too small
1617 * decrease if too large
1618 */
1619 bufsize = dbtob(btod(ul->un_maxtransfer));
1620 if (bufsize < ldl_minbufsize)
1621 bufsize = ldl_minbufsize;
1622 if (bufsize > maxphys)
1623 bufsize = maxphys;
1624 if (bufsize > ul->un_maxtransfer)
1625 bufsize = ul->un_maxtransfer;
1626 return (bufsize);
1627 }