1 #! /usr/bin/ksh -p
2 # CDDL HEADER START
3 #
4 # The contents of this file are subject to the terms of the
5 # Common Development and Distribution License (the "License").
6 # You may not use this file except in compliance with the License.
7 #
8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 # or http://www.opensolaris.org/os/licensing.
10 # See the License for the specific language governing permissions
11 # and limitations under the License.
12 #
13 # When distributing Covered Code, include this CDDL HEADER in each
14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 # If applicable, add the following below this CDDL HEADER, with the
16 # fields enclosed by brackets "[]" replaced with your own identifying
17 # information: Portions Copyright [yyyy] [name of copyright owner]
18 #
19 # CDDL HEADER END
20 # Portions Copyright 2009 Sun Microsystems, Inc.
21 # Portions Copyright 2009 Richard Elling
22 #
23 # File: zilstat.ksh
24 # Author: Richard.Elling@RichardElling.com
25 # Online information:
26 # http://www.RichardElling.com/Home/scripts-and-programs-1/zilstat-intro
27 #
28 # This dtrace program will help identify the ZIL activity by sampling
29 # writes sent to the ZIL.
30 # output:
31 # [TIME]
32 # Bytes - total bytes written to ZIL over the interval
33 # Bytes/S - bytes/s written to ZIL over ther interval
34 # Max-Rate - maximum rate during any 1-second sample
35 # this output is listed for both the size of the data and the
36 # size of the buffer containing the data.
37 # In an attempt to reconcile writes > zfs_immediate_write_sz
38 # or to otherwise make better decisions, the size of the buffer
39 # is also represented in bins: <=4kBytes, 4-32kBytes, and > 32kBytes.
40 # This should help you determine if the workload contains a bunch of
41 # itty-bitty synchronous writes or just a few, large writes. This may
42 # be important because if the pool does not have a log device and the
43 # writes are > 32kBytes (zfs_immediate_write_sz) then they are not written
44 # to the ZIL, but are instead written directly to the pool. OTOH, if there
45 # is a separate log, then the writes are always sent to the log with the
46 # expectation that the log device is always faster (lower latency) than
47 # the pool. However, until a per-pool view is generated, the stats collected
48 # here are for all pools.
49 #
50 # TODO: add per-pool option which also knows about zfs_immediate_write_sz
51 # logic.
52 #
53 ##############################
54 # --- Process Arguments ---
55 #
56
57 ### default variables
58 opt_mega=0
59 opt_pool=0
60 opt_time=0
61 opt_txg=0
62 filter=0
63 pool=
64 lines=-1
65 interval=1
66 count=-1
67
68 ### process options
69 while getopts hl:Mp:t name
70 do
71 case $name in
72 l) lines=$OPTARG ;;
73 M) opt_mega=1 ;;
74 p) opt_pool=1; pool=$OPTARG ;;
75 t) opt_time=1 ;;
76 h|?) ME=$(basename $0)
77 cat <<-END >&2
78 Usage: $ME [gMt][-l linecount] [-p poolname] [interval [count]]
79 -M # print numbers as megabytes (base 10)
80 -t # print timestamp
81 -p poolname # only look at poolname
82 -l linecount # print header every linecount lines (default=only once)
83 interval in seconds or "txg" for transaction group commit intervals
84 note: "txg" only appropriate when -p poolname is used
85 count will limit the number of intervals reported
86
87 examples:
88 $ME # default output, 1 second samples
89 $ME 10 # 10 second samples
90 $ME 10 6 # print 6 x 10 second samples
91 $ME -p rpool # show ZIL stats for rpool only
92
93 output:
94 [TIME]
95 N-Bytes - data bytes written to ZIL over the interval
96 N-Bytes/s - data bytes/s written to ZIL over ther interval
97 N-Max-Rate - maximum data rate during any 1-second sample
98 B-Bytes - buffer bytes written to ZIL over the interval
99 B-Bytes/s - buffer bytes/s written to ZIL over ther interval
100 B-Max-Rate - maximum buffer rate during any 1-second sample
101 ops - number of synchronous iops per interval
102 <=4kB - number of synchronous iops <= 4kBytes per interval
103 4-32kB - number of synchronous iops 4-32kBytes per interval
104 >=32kB - number of synchronous iops >= 32kBytes per interval
105 note: data bytes are actual data, total bytes counts buffer size
106 END
107 exit 1
108 esac
109 done
110
111 shift $(( $OPTIND - 1 ))
112
113 ### option logic
114 if [[ "$1" > 0 ]]; then
115 interval=$1; shift
116 fi
117 if [[ "$1" > 0 ]]; then
118 count=$1; shift
119 fi
120 if (( opt_pool )); then
121 filter=1
122 fi
123
124 case "$interval" in
125 "txg")
126 if [[ $opt_pool != 1 ]]; then
127 echo "error: -p poolname option must be used for txg intervals"
128 exit 1
129 fi
130 opt_txg=1
131 interval=0
132 ;;
133 [0-9]*)
134 ;;
135 *)
136 echo "error: interval must be a number or \"txg\""
137 exit 1
138 ;;
139 esac
140
141 ##############################
142 # --- Main Program, DTrace ---
143
144 /usr/sbin/dtrace -n '
145 #pragma D option quiet
146 inline int OPT_time = '$opt_time';
147 inline int OPT_txg = '$opt_txg';
148 inline int OPT_pool = '$opt_pool';
149 inline int OPT_mega = '$opt_mega';
150 inline int INTERVAL = '$interval';
151 inline int LINES = '$lines';
152 inline int COUNTER = '$count';
153 inline int FILTER = '$filter';
154 inline string POOL = "'$pool'";
155 dtrace:::BEGIN
156 {
157 /* starting values */
158 MEGA = 1000000;
159 counts = COUNTER;
160 secs = INTERVAL;
161 interval = INTERVAL;
162 interval == 0 ? interval++ : 1;
163 line = 0;
164 last_event[""] = 0;
165 nused=0;
166 nused_max_per_sec=0;
167 nused_per_sec=0;
168 size=0;
169 size_max_per_sec=0;
170 size_per_sec=0;
171 syncops=0;
172 size_4k=0;
173 size_4k_32k=0;
174 size_32k=0;
175 OPT_txg ? printf("waiting for txg commit...\n") : 1;
176 }
177
178 /*
179 * collect info when zil_lwb_write_start fires
180 */
181 fbt::zil_lwb_write_issue:entry
182 /OPT_pool == 0 || POOL == args[0]->zl_dmu_pool->dp_spa->spa_name/
183 {
184 nused += args[1]->lwb_nused;
185 nused_per_sec += args[1]->lwb_nused;
186 size += args[1]->lwb_sz;
187 size_per_sec += args[1]->lwb_sz;
188 syncops++;
189 args[1]->lwb_sz <= 4096 ? size_4k++ : 1;
190 args[1]->lwb_sz > 4096 && args[1]->lwb_sz < 32768 ? size_4k_32k++ : 1;
191 args[1]->lwb_sz >= 32768 ? size_32k++ : 1;
192 }
193
194 /*
195 * Timer
196 */
197 profile:::tick-1sec
198 {
199 OPT_txg ? secs++ : secs--;
200 nused_per_sec > nused_max_per_sec ? nused_max_per_sec = nused_per_sec : 1;
201 nused_per_sec = 0;
202 size_per_sec > size_max_per_sec ? size_max_per_sec = size_per_sec : 1;
203 size_per_sec = 0;
204 }
205
206 /*
207 * Print header
208 */
209 profile:::tick-1sec
210 /OPT_txg == 0 && line == 0/
211 {
212 /* print optional headers */
213 OPT_time ? printf("%-20s ", "TIME") : 1;
214
215 /* print header */
216 OPT_mega ? printf("%10s %10s %10s %10s %10s %10s",
217 "N-MB", "N-MB/s", "N-Max-Rate",
218 "B-MB", "B-MB/s", "B-Max-Rate") :
219 printf("%10s %10s %10s %10s %10s %10s",
220 "N-Bytes", "N-Bytes/s", "N-Max-Rate",
221 "B-Bytes", "B-Bytes/s", "B-Max-Rate");
222 printf(" %6s %6s %6s %6s\n",
223 "ops", "<=4kB", "4-32kB", ">=32kB");
224 line = LINES;
225 }
226
227 fbt::txg_quiesce:entry
228 /OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name && line == 0/
229 {
230 OPT_time ? printf("%-20s ", "TIME") : 1;
231
232 OPT_mega ? printf("%10s %10s %10s %10s %10s %10s %10s",
233 "txg", "N-MB", "N-MB/s", "N-Max-Rate",
234 "B-MB", "B-MB/s", "B-Max-Rate") :
235 printf("%10s %10s %10s %10s %10s %10s %10s",
236 "txg", "N-Bytes", "N-Bytes/s", "N-Max-Rate",
237 "B-Bytes", "B-Bytes/s", "B-Max-Rate");
238 printf(" %6s %6s %6s %6s\n",
239 "ops", "<=4kB", "4-32kB", ">=32kB");
240 line = LINES;
241 }
242
243 /*
244 * Print Output
245 */
246 profile:::tick-1sec
247 /OPT_txg == 0 && secs == 0/
248 {
249 OPT_time ? printf("%-20Y ", walltimestamp) : 1;
250 OPT_mega ?
251 printf("%10d %10d %10d %10d %10d %10d",
252 nused/MEGA, nused/(interval*MEGA), nused_max_per_sec/MEGA,
253 size/MEGA, size/(interval*MEGA), size_max_per_sec/MEGA) :
254 printf("%10d %10d %10d %10d %10d %10d",
255 nused, nused/interval, nused_max_per_sec,
256 size, size/interval, size_max_per_sec);
257 printf(" %6d %6d %6d %6d\n",
258 syncops, size_4k, size_4k_32k, size_32k);
259 nused = 0;
260 nused_per_sec = 0;
261 nused_max_per_sec = 0;
262 size=0;
263 size_max_per_sec=0;
264 size_per_sec=0;
265 syncops=0;
266 size_4k=0;
267 size_4k_32k=0;
268 size_32k=0;
269 secs = INTERVAL;
270 counts--;
271 line--;
272 }
273
274 fbt::txg_quiesce:entry
275 /OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name/
276 {
277 secs <= 0 ? secs=1 : 1;
278 OPT_time ? printf("%-20Y ", walltimestamp) : 1;
279 OPT_mega ?
280 printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
281 nused/MEGA, nused/(secs*MEGA), nused_max_per_sec/MEGA,
282 size/MEGA, size/(secs*MEGA), size_max_per_sec/MEGA) :
283 printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
284 nused, nused/secs, nused_max_per_sec,
285 size, size/secs, size_max_per_sec);
286 printf(" %6d %6d %6d %6d\n",
287 syncops, size_4k, size_4k_32k, size_32k);
288 nused = 0;
289 nused_per_sec = 0;
290 nused_max_per_sec = 0;
291 size=0;
292 size_max_per_sec=0;
293 size_per_sec=0;
294 syncops=0;
295 size_4k=0;
296 size_4k_32k=0;
297 size_32k=0;
298 secs = 0;
299 counts--;
300 line--;
301 }
302
303 /*
304 * End of program
305 */
306 profile:::tick-1sec
307 /OPT_txg == 0 && counts == 0/
308 {
309 exit(0);
310 }
311 fbt::txg_quiesce:entry
312 /OPT_txg == 1 && counts == 0/
313 {
314 exit(0);
315 }
316 '