1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
14 */
15
16 /*
17 * (SMB1/SMB2) Server-level Oplock support.
18 *
19 * Conceptually, this is a separate layer on top of the
20 * file system (FS) layer oplock code in smb_cmn_oplock.c.
21 * If these layers were more distinct, the FS layer would
22 * need to use call-back functions (installed from here)
23 * to "indicate an oplock break to the server" (see below).
24 * As these layers are all in the same kernel module, the
25 * delivery of these break indications just uses a direct
26 * function call to smb_oplock_ind_break() below.
27 *
28 * This layer is responsible for handling the break indication,
29 * which often requires scheduling a taskq job in the server,
30 * and sending an oplock break mesage to the client using
31 * the appropriate protocol for the open handle affected.
32 *
33 * The details of composing an oplock break message, the
34 * protocol-specific details of requesting an oplock, and
35 * returning that oplock to the client are in the files:
36 * smb_oplock.c, smb2_oplock.c, smb2_lease.c
37 */
38
39 #include <smbsrv/smb2_kproto.h>
40 #include <smbsrv/smb_oplock.h>
41
42 /*
43 * Verify relationship between BREAK_TO_... and CACHE bits,
44 * used when setting the BREAK_TO_... below.
45 */
46 #if BREAK_TO_READ_CACHING != (READ_CACHING << BREAK_SHIFT)
47 #error "BREAK_TO_READ_CACHING"
48 #endif
49 #if BREAK_TO_HANDLE_CACHING != (HANDLE_CACHING << BREAK_SHIFT)
50 #error "BREAK_TO_HANDLE_CACHING"
51 #endif
52 #if BREAK_TO_WRITE_CACHING != (WRITE_CACHING << BREAK_SHIFT)
53 #error "BREAK_TO_WRITE_CACHING"
54 #endif
55 #define CACHE_RWH (READ_CACHING | WRITE_CACHING | HANDLE_CACHING)
56
57 /*
58 * This is the timeout used in the thread that sends an
59 * oplock break and waits for the client to respond
60 * before it breaks the oplock locally.
61 */
62 int smb_oplock_timeout_ack = 30000; /* mSec. */
63
64 /*
65 * This is the timeout used in threads that have just
66 * finished some sort of oplock request and now must
67 * wait for (possibly multiple) breaks to complete.
68 * This value must be at least a couple seconds LONGER
69 * than the ack timeout above so that I/O callers won't
70 * give up waiting before the local ack timeout.
71 */
72 int smb_oplock_timeout_def = 45000; /* mSec. */
73
74 static void smb_oplock_async_break(void *);
75 static void smb_oplock_hdl_clear(smb_ofile_t *);
76
77
78 /*
79 * 2.1.5.17.3 Indicating an Oplock Break to the Server
80 *
81 * The inputs for indicating an oplock break to the server are:
82 *
83 * BreakingOplockOpen: The Open used to request the oplock
84 * that is now breaking.
85 * NewOplockLevel: The type of oplock the requested oplock
86 * has been broken to. Valid values are as follows:
87 * LEVEL_NONE (that is, no oplock)
88 * LEVEL_TWO
89 * A combination of one or more of the following flags:
90 * READ_CACHING
91 * HANDLE_CACHING
92 * WRITE_CACHING
93 * AcknowledgeRequired: A Boolean value; TRUE if the server
94 * MUST acknowledge the oplock break, FALSE if not,
95 * as specified in section 2.1.5.18.
96 * OplockCompletionStatus: The NTSTATUS code to return to the server.
97 *
98 * This algorithm simply represents the completion of an oplock request,
99 * as specified in section 2.1.5.17.1 or section 2.1.5.17.2. The server
100 * is expected to associate the return status from this algorithm with
101 * BreakingOplockOpen, which is the Open passed in when it requested
102 * the oplock that is now breaking.
103 *
104 * It is important to note that because several oplocks can be outstanding
105 * in parallel, although this algorithm represents the completion of an
106 * oplock request, it might not result in the completion of the algorithm
107 * that called it. In particular, calling this algorithm will result in
108 * completion of the caller only if BreakingOplockOpen is the same as the
109 * Open with which the calling algorithm was itself called. To mitigate
110 * confusion, each algorithm that refers to this section will specify
111 * whether that algorithm's operation terminates at that point or not.
112 *
113 * The object store MUST return OplockCompletionStatus,
114 * AcknowledgeRequired, and NewOplockLevel to the server (the algorithm is
115 * as specified in section 2.1.5.17.1 and section 2.1.5.17.2).
116 *
117 * Implementation:
118 *
119 * We use two versions of this function:
120 * smb_oplock_ind_break_in_ack
121 * smb_oplock_ind_break
122 *
123 * The first is used when we're handling an Oplock Break Ack.
124 * The second is used when other operations cause a break,
125 * generally in one of the smb_oplock_break_... functions.
126 *
127 * Note that these are call-back functions that may be called with the
128 * node ofile list rwlock held and the node oplock mutex entered, so
129 * these should ONLY schedule oplock break work, and MUST NOT attempt
130 * any actions that might require either of those locks.
131 */
132
133 /*
134 * smb_oplock_ind_break_in_ack
135 *
136 * Variant of smb_oplock_ind_break() for the oplock Ack handler.
137 * When we need to indicate another oplock break from within the
138 * Ack handler (during the Ack. of some previous oplock break)
139 * we need to make sure this new break indication goes out only
140 * AFTER the reply to the current break ack. is sent out.
141 *
142 * In this case, we always have an SR (the break ack) so we can
143 * append the "ind break" work to the current SR and let the
144 * request hander thread do this work after the reply is sent.
145 * Note: this is always an SMB2 or later request, because this
146 * only happens for "granular" oplocks, which are SMB2-only.
147 *
148 * This is mostly the same as smb_oplock_ind_break() except:
149 * - The only CompletionStatus possible is STATUS_CANT_GRANT.
150 * - Instead of taskq_dispatch this appends the new SR to
151 * the "post work" queue on the current SR.
152 *
153 * Note called with the node ofile list rwlock held and
154 * the oplock mutex entered.
155 */
156 void
157 smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile,
158 uint32_t NewLevel, boolean_t AckRequired)
159 {
160 smb_request_t *new_sr;
161
162 /*
163 * This should happen only with SMB2 or later,
164 * but in case that ever changes...
165 */
166 if (ack_sr->session->dialect < SMB_VERS_2_BASE) {
167 smb_oplock_ind_break(ofile, NewLevel,
168 AckRequired, STATUS_CANT_GRANT);
169 return;
170 }
171
172 /*
173 * We're going to schedule a request that will have a
174 * reference to this ofile. Get the hold first.
175 */
176 if (!smb_ofile_hold_olbrk(ofile)) {
177 /* It's closing (or whatever). Nothing to do. */
178 return;
179 }
180
181 /*
182 * When called from Ack processing, we want to use a
183 * request on the session doing the ack. If we can't
184 * allocate a request on that session (because it's
185 * now disconnecting) just fall-back to the normal
186 * oplock break code path which deals with that.
187 * Once we have a request on the ack session, that
188 * session won't go away until the request is done.
189 */
190 new_sr = smb_request_alloc(ack_sr->session, 0);
191 if (new_sr == NULL) {
192 smb_oplock_ind_break(ofile, NewLevel,
193 AckRequired, STATUS_CANT_GRANT);
194 smb_ofile_release(ofile);
195 return;
196 }
197
198 new_sr->sr_state = SMB_REQ_STATE_SUBMITTED;
199 new_sr->smb2_async = B_TRUE;
200 new_sr->user_cr = zone_kcred();
201 new_sr->fid_ofile = ofile;
202 /* Leave tid_tree, uid_user NULL. */
203 new_sr->arg.olbrk.NewLevel = NewLevel;
204 new_sr->arg.olbrk.AckRequired = AckRequired;
205
206 /*
207 * Using smb2_cmd_code to indicate what to call.
208 * work func. will call smb_oplock_send_brk
209 */
210 new_sr->smb2_cmd_code = SMB2_OPLOCK_BREAK;
211 smb2sr_append_postwork(ack_sr, new_sr);
212 }
213
214 /*
215 * smb_oplock_ind_break
216 *
217 * This is the function described in [MS-FSA] 2.1.5.17.3
218 * which is called many places in the oplock break code.
219 *
220 * Schedule a request & taskq job to do oplock break work
221 * as requested by the FS-level code (smb_cmn_oplock.c).
222 *
223 * Note called with the node ofile list rwlock held and
224 * the oplock mutex entered.
225 */
226 void
227 smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel,
228 boolean_t AckRequired, uint32_t CompletionStatus)
229 {
230 smb_server_t *sv = ofile->f_server;
231 smb_request_t *sr = NULL;
232
233 /*
234 * See notes at smb_oplock_async_break re. CompletionStatus
235 * Check for any invalid codes here, so assert happens in
236 * the thread passing an unexpected value.
237 * The real work happens in a taskq job.
238 */
239 switch (CompletionStatus) {
240
241 case NT_STATUS_SUCCESS:
242 case STATUS_CANT_GRANT:
243 /* Send break via taskq job. */
244 break;
245
246 case STATUS_NEW_HANDLE:
247 case NT_STATUS_OPLOCK_HANDLE_CLOSED:
248 smb_oplock_hdl_clear(ofile);
249 return;
250
251 default:
252 ASSERT(0);
253 return;
254 }
255
256 /*
257 * We're going to schedule a request that will have a
258 * reference to this ofile. Get the hold first.
259 */
260 if (!smb_ofile_hold_olbrk(ofile)) {
261 /* It's closing (or whatever). Nothing to do. */
262 return;
263 }
264
265 /*
266 * We need a request allocated on the session that owns
267 * this ofile in order to safely send on that session.
268 *
269 * Note that while we hold a ref. on the ofile, it's
270 * f_session will not change. An ofile in state
271 * _ORPHANED will have f_session == NULL, but the
272 * f_session won't _change_ while we have a ref,
273 * and won't be torn down under our feet.
274 *
275 * If f_session is NULL, or it's in a state that doesn't
276 * allow new requests, use the special "server" session.
277 */
278 if (ofile->f_session != NULL)
279 sr = smb_request_alloc(ofile->f_session, 0);
280 if (sr == NULL)
281 sr = smb_request_alloc(sv->sv_session, 0);
282
283 sr->sr_state = SMB_REQ_STATE_SUBMITTED;
284 sr->smb2_async = B_TRUE;
285 sr->user_cr = zone_kcred();
286 sr->fid_ofile = ofile;
287 /* Leave tid_tree, uid_user NULL. */
288 sr->arg.olbrk.NewLevel = NewLevel;
289 sr->arg.olbrk.AckRequired = AckRequired;
290 sr->smb2_status = CompletionStatus;
291
292 (void) taskq_dispatch(
293 sv->sv_worker_pool,
294 smb_oplock_async_break, sr, TQ_SLEEP);
295 }
296
297 /*
298 * smb_oplock_async_break
299 *
300 * Called via the taskq to handle an asynchronous oplock break.
301 * We have a hold on the ofile, which will be released in
302 * smb_request_free (via sr->fid_ofile)
303 *
304 * Note we have: sr->uid_user == NULL, sr->tid_tree == NULL.
305 * Nothing called here needs those.
306 *
307 * Note that NewLevel as provided by the FS up-call does NOT
308 * include the GRANULAR flag. The SMB level is expected to
309 * keep track of how each oplock was acquired (by lease or
310 * traditional oplock request) and put the GRANULAR flag
311 * back into the oplock state when calling down to the
312 * FS-level code. Also note that the lease break message
313 * carries only the cache flags, not the GRANULAR flag.
314 */
315 static void
316 smb_oplock_async_break(void *arg)
317 {
318 smb_request_t *sr = arg;
319 uint32_t CompletionStatus;
320
321 SMB_REQ_VALID(sr);
322
323 CompletionStatus = sr->smb2_status;
324 sr->smb2_status = NT_STATUS_SUCCESS;
325
326 mutex_enter(&sr->sr_mutex);
327 sr->sr_worker = curthread;
328 sr->sr_state = SMB_REQ_STATE_ACTIVE;
329 mutex_exit(&sr->sr_mutex);
330
331 /*
332 * Note that the CompletionStatus from the FS level
333 * (smb_cmn_oplock.c) encodes what kind of action we
334 * need to take at the SMB level.
335 */
336 switch (CompletionStatus) {
337
338 case STATUS_CANT_GRANT:
339 case NT_STATUS_SUCCESS:
340 smb_oplock_send_brk(sr);
341 break;
342
343 default:
344 /* Checked by caller. */
345 ASSERT(0);
346 break;
347 }
348
349 if (sr->dh_nvl_dirty) {
350 sr->dh_nvl_dirty = B_FALSE;
351 smb2_dh_update_nvfile(sr);
352 }
353
354 sr->sr_state = SMB_REQ_STATE_COMPLETED;
355 smb_request_free(sr);
356 }
357
358 #ifdef DEBUG
359 int smb_oplock_debug_wait = 0;
360 #endif
361
362 /*
363 * Send an oplock break over the wire, or if we can't,
364 * then process the oplock break locally.
365 *
366 * Note that we have sr->fid_ofile here but all the other
367 * normal sr members are NULL: uid_user, tid_tree.
368 * Also sr->session may or may not be the same session as
369 * the ofile came from (ofile->f_session) depending on
370 * whether this is a "live" open or an orphaned DH,
371 * where ofile->f_session will be NULL.
372 *
373 * Given that we don't always have a session, we determine
374 * the oplock type (lease etc) from f_oplock.og_dialect.
375 */
376 void
377 smb_oplock_send_brk(smb_request_t *sr)
378 {
379 smb_ofile_t *ofile;
380 smb_lease_t *lease;
381 uint32_t NewLevel;
382 boolean_t AckReq;
383 uint32_t status;
384 int rc;
385
386 ofile = sr->fid_ofile;
387 NewLevel = sr->arg.olbrk.NewLevel;
388 AckReq = sr->arg.olbrk.AckRequired;
389 lease = ofile->f_lease;
390
391 /*
392 * Build the break message in sr->reply.
393 * It's free'd in smb_request_free().
394 * Also updates the lease and NewLevel.
395 */
396 sr->reply.max_bytes = MLEN;
397 if (ofile->f_oplock.og_dialect >= SMB_VERS_2_BASE) {
398 if (lease != NULL) {
399 /*
400 * Oplock state has changed, so
401 * update the epoch.
402 */
403 mutex_enter(&lease->ls_mutex);
404 lease->ls_epoch++;
405 mutex_exit(&lease->ls_mutex);
406
407 /* Note, needs "old" state in og_state */
408 smb2_lease_break_notification(sr,
409 (NewLevel & CACHE_RWH), AckReq);
410 NewLevel |= OPLOCK_LEVEL_GRANULAR;
411 } else {
412 smb2_oplock_break_notification(sr, NewLevel);
413 }
414 } else {
415 /*
416 * SMB1 clients should only get Level II oplocks if they
417 * set the capability indicating they know about them.
418 */
419 if (NewLevel == OPLOCK_LEVEL_TWO &&
420 ofile->f_oplock.og_dialect < NT_LM_0_12)
421 NewLevel = OPLOCK_LEVEL_NONE;
422 smb1_oplock_break_notification(sr, NewLevel);
423 }
424
425 /*
426 * Keep track of what we last sent to the client,
427 * preserving the GRANULAR flag (if a lease).
428 * If we're expecting an ACK, set og_breaking
429 * (and maybe lease->ls_breaking) so we can
430 * later find the ofile with breaks pending.
431 */
432 if (AckReq) {
433 uint32_t BreakTo;
434
435 if (lease != NULL) {
436 BreakTo = (NewLevel & CACHE_RWH) << BREAK_SHIFT;
437 if (BreakTo == 0)
438 BreakTo = BREAK_TO_NO_CACHING;
439 lease->ls_breaking = BreakTo;
440 } else {
441 if ((NewLevel & LEVEL_TWO_OPLOCK) != 0)
442 BreakTo = BREAK_TO_TWO;
443 else
444 BreakTo = BREAK_TO_NONE;
445 }
446 /* Will update og_state in ack. */
447 ofile->f_oplock.og_breaking = BreakTo;
448 } else {
449 if (lease != NULL)
450 lease->ls_state = NewLevel & CACHE_RWH;
451 ofile->f_oplock.og_state = NewLevel;
452
453 if (ofile->dh_persist) {
454 smb2_dh_update_oplock(sr, ofile);
455 }
456 }
457
458 /*
459 * Try to send the break message to the client.
460 * When we get to multi-channel, this is supposed to
461 * try to send on every channel before giving up.
462 */
463 if (sr->session == ofile->f_session)
464 rc = smb_session_send(sr->session, 0, &sr->reply);
465 else
466 rc = ENOTCONN;
467
468 if (rc == 0) {
469 /*
470 * OK, we were able to send the break message.
471 * If no ack. required, we're done.
472 */
473 if (!AckReq)
474 return;
475
476 /*
477 * We're expecting an ACK. Wait in this thread
478 * so we can log clients that don't respond.
479 *
480 * If debugging, may want to break after a
481 * short wait to look into why we might be
482 * holding up progress. (i.e. locks?)
483 */
484 #ifdef DEBUG
485 if (smb_oplock_debug_wait > 0) {
486 status = smb_oplock_wait_break(ofile->f_node,
487 smb_oplock_debug_wait);
488 if (status == 0)
489 return;
490 cmn_err(CE_NOTE, "clnt %s oplock break wait debug",
491 sr->session->ip_addr_str);
492 debug_enter("oplock_wait");
493 }
494 #endif
495 status = smb_oplock_wait_break(ofile->f_node,
496 smb_oplock_timeout_ack);
497 if (status == 0)
498 return;
499
500 cmn_err(CE_NOTE, "clnt %s oplock break timeout",
501 sr->session->ip_addr_str);
502 DTRACE_PROBE1(break_timeout, smb_ofile_t, ofile);
503
504 /*
505 * Will do local ack below. Note, after timeout,
506 * do a break to none or "no caching" regardless
507 * of what the passed in cache level was.
508 * That means: clear all except GRANULAR.
509 */
510 NewLevel &= OPLOCK_LEVEL_GRANULAR;
511 } else {
512 /*
513 * We were unable to send the oplock break request.
514 * Generally, that means we have no connection to this
515 * client right now, and this ofile will have state
516 * SMB_OFILE_STATE_ORPHANED. We either close the handle
517 * or break the oplock locally, in which case the client
518 * gets the updated oplock state when they reconnect.
519 * Decide whether to keep or close.
520 *
521 * Relevant [MS-SMB2] sections:
522 *
523 * 3.3.4.6 Object Store Indicates an Oplock Break
524 * If Open.Connection is NULL, Open.IsResilient is FALSE,
525 * Open.IsDurable is FALSE and Open.IsPersistent is FALSE,
526 * the server SHOULD close the Open as specified in...
527 *
528 * 3.3.4.7 Object Store Indicates a Lease Break
529 * If Open.Connection is NULL, the server MUST close the
530 * Open as specified in ... for the following cases:
531 * - Open.IsResilient is FALSE, Open.IsDurable is FALSE,
532 * and Open.IsPersistent is FALSE.
533 * - Lease.BreakToLeaseState does not contain
534 * ...HANDLE_CACHING and Open.IsDurable is TRUE.
535 * If Lease.LeaseOpens is empty, (... local ack to "none").
536 */
537
538 /*
539 * See similar logic in smb_ofile_should_save
540 */
541 switch (ofile->dh_vers) {
542 case SMB2_RESILIENT:
543 break; /* keep DH */
544
545 case SMB2_DURABLE_V2:
546 if (ofile->dh_persist)
547 break; /* keep DH */
548 /* FALLTHROUGH */
549 case SMB2_DURABLE_V1:
550 /* IS durable (v1 or v2) */
551 if ((NewLevel & (OPLOCK_LEVEL_BATCH |
552 OPLOCK_LEVEL_CACHE_HANDLE)) != 0)
553 break; /* keep DH */
554 /* FALLTHROUGH */
555 case SMB2_NOT_DURABLE:
556 default:
557 smb_ofile_close(ofile, 0);
558 return;
559 }
560 /* Keep this ofile (durable handle). */
561
562 if (!AckReq) {
563 /* Nothing more to do. */
564 return;
565 }
566 }
567
568 /*
569 * We get here after either an oplock break ack timeout,
570 * or a send failure for a durable handle type that we
571 * preserve rather than just close. Do local ack.
572 */
573 ofile->f_oplock.og_breaking = 0;
574 if (lease != NULL)
575 lease->ls_breaking = 0;
576
577 status = smb_oplock_ack_break(sr, ofile, &NewLevel);
578 if (status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS) {
579 /* Not expecting this status return. */
580 cmn_err(CE_NOTE, "clnt local oplock ack wait?");
581 (void) smb_oplock_wait_break(ofile->f_node,
582 smb_oplock_timeout_ack);
583 status = 0;
584 }
585 if (status != 0) {
586 cmn_err(CE_NOTE, "clnt local oplock ack, "
587 "status=0x%x", status);
588 }
589
590 /* Update og_state as if we heard from the client. */
591 ofile->f_oplock.og_state = NewLevel;
592 if (lease != NULL) {
593 lease->ls_state = NewLevel & CACHE_RWH;
594 }
595
596 if (ofile->dh_persist) {
597 smb2_dh_update_oplock(sr, ofile);
598 }
599 }
600
601 /*
602 * See: NT_STATUS_OPLOCK_HANDLE_CLOSED above,
603 * and: STATUS_NEW_HANDLE
604 *
605 * The FS-level oplock layer calls this to update the
606 * SMB-level state when a handle loses its oplock.
607 */
608 static void
609 smb_oplock_hdl_clear(smb_ofile_t *ofile)
610 {
611 smb_lease_t *lease = ofile->f_lease;
612
613 if (lease != NULL) {
614 if (lease->ls_oplock_ofile == ofile) {
615 /* Last close on the lease. */
616 lease->ls_oplock_ofile = NULL;
617 }
618 }
619 ofile->f_oplock.og_state = 0;
620 ofile->f_oplock.og_breaking = 0;
621 }
622
623 /*
624 * Wait up to "timeout" mSec. for the current oplock "breaking" flags
625 * to be cleared (by smb_oplock_ack_break or smb_oplock_break_CLOSE).
626 *
627 * Callers of the above public oplock functions:
628 * smb_oplock_request()
629 * smb_oplock_ack_break()
630 * smb_oplock_break_OPEN() ...
631 * check for return status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS
632 * and call this function to wait for the break to complete.
633 *
634 * Most callers should use this default timeout, which they get
635 * by passing zero as the timeout arg. This include places where
636 * we're about to do something that invalidates some cache.
637 */
638 uint32_t
639 smb_oplock_wait_break(smb_node_t *node, int timeout) /* mSec. */
640 {
641 smb_oplock_t *ol;
642 clock_t time, rv;
643 uint32_t status = 0;
644
645 if (timeout == 0)
646 timeout = smb_oplock_timeout_def;
647
648 SMB_NODE_VALID(node);
649 ol = &node->n_oplock;
650
651 mutex_enter(&ol->ol_mutex);
652 time = MSEC_TO_TICK(timeout) + ddi_get_lbolt();
653
654 while ((ol->ol_state & BREAK_ANY) != 0) {
655 ol->waiters++;
656 rv = cv_timedwait(&ol->WaitingOpenCV,
657 &ol->ol_mutex, time);
658 ol->waiters--;
659 if (rv < 0) {
660 status = NT_STATUS_CANNOT_BREAK_OPLOCK;
661 break;
662 }
663 }
664
665 mutex_exit(&ol->ol_mutex);
666
667 return (status);
668 }