1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 #ifndef _NFS_RNODE4_H
33 #define _NFS_RNODE4_H
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 #include <nfs/rnode.h> /* for symlink_cache, nfs_rwlock_t, etc. */
40 #include <nfs/nfs4.h>
41 #include <nfs/nfs4_clnt.h>
42 #include <sys/thread.h>
43 #include <sys/sysmacros.h> /* for offsetof */
44
45 typedef enum nfs4_stub_type {
46 NFS4_STUB_NONE,
47 NFS4_STUB_MIRRORMOUNT,
48 NFS4_STUB_REFERRAL
49 } nfs4_stub_type_t;
50
51 typedef enum nfs4_access_type {
52 NFS4_ACCESS_UNKNOWN,
53 NFS4_ACCESS_ALLOWED,
54 NFS4_ACCESS_DENIED
55 } nfs4_access_type_t;
56
57 /*
58 * Access cache
59 */
60 typedef struct acache4_hash {
61 struct acache4 *next;
62 struct acache4 *prev;
63 krwlock_t lock;
64 } acache4_hash_t;
65
66 typedef struct acache4 {
67 struct acache4 *next; /* next and prev must be first */
68 struct acache4 *prev;
69 uint32_t known;
70 uint32_t allowed;
71 struct rnode4 *rnode;
72 cred_t *cred;
73 struct acache4 *list;
74 struct acache4_hash *hashq;
75 } acache4_t;
76
77 /*
78 * Note on the different buffer sizes in rddir4_cache:
79 * There seems to be some discrepancy between the intended and actual
80 * use of entlen and buflen, which does not correspond to the comment below.
81 * entlen - nfsv2/3 used as both alloc'd size of entries buffer and
82 * as the actual size of the entries (XXX is this correct?).
83 * nfsv4 will use it only as the alloc'd size.
84 * buflen - used for calculations of readahead.
85 * actlen - added for nfsv4 to serve as the size of the useful
86 * portion of the entries buffer. That is because in
87 * nfsv4, the otw entries are converted to system entries,
88 * and may not be the same size - thus buffer may not be full.
89 */
90 typedef struct rddir4_cache {
91 lloff_t _cookie; /* cookie used to find this cache entry */
92 lloff_t _ncookie; /* cookie used to find the next cache entry */
93 char *entries; /* buffer containing dirent entries */
94 int eof; /* EOF reached after this request */
95 int entlen; /* size of dirent entries in buf */
96 int buflen; /* size of the buffer used to store entries */
97 int actlen; /* size of the actual entries (nfsv4 only) */
98 int flags; /* control flags, see below */
99 kcondvar_t cv; /* cv for blocking */
100 int error; /* error from RPC operation */
101 void *data; /* private data */
102 } rddir4_cache;
103
104 #define nfs4_cookie _cookie._f
105 #define nfs4_ncookie _ncookie._f
106
107 /*
108 * Shadow vnode, v4 only.
109 *
110 * A file's shadow vnode list is protected by its hash bucket lock,
111 * r_hashq->r_lock.
112 *
113 * sv_r_vnode is protected by the appropriate vnode locks.
114 *
115 * sv_dfh, sv_name, sv_dfileid, and sv_dfileid_valid are protected
116 * by rp->r_svlock.
117 */
118
119 typedef struct insq_link {
120 void *forw;
121 void *back;
122 } insq_link_t;
123
124 typedef struct svnode {
125 insq_link_t sv_link; /* must be first for insque */
126 vnode_t *sv_r_vnode; /* vnode for this shadow */
127 nfs4_fname_t *sv_name; /* component name */
128 nfs4_sharedfh_t *sv_dfh; /* directory file handle */
129 } svnode_t;
130
131 #define sv_forw sv_link.forw
132 #define sv_back sv_link.back
133 extern svnode_t *vtosv(vnode_t *);
134 #define VTOSV(vp) vtosv(vp)
135 #define SVTOV(svp) (((svp)->sv_r_vnode))
136 #define IS_SHADOW(vp, rp) ((vp) != (rp)->r_vnode)
137
138 /*
139 * The format of the hash bucket used to lookup rnodes from a file handle.
140 */
141 typedef struct r4hashq {
142 struct rnode4 *r_hashf;
143 struct rnode4 *r_hashb;
144 krwlock_t r_lock;
145 } r4hashq_t;
146
147 /*
148 * Remote file information structure.
149 *
150 * The rnode is the "inode" for remote files. It contains all the
151 * information necessary to handle remote file on the client side.
152 *
153 * Note on file sizes: we keep two file sizes in the rnode: the size
154 * according to the client (r_size) and the size according to the server
155 * (r_attr.va_size). They can differ because we modify r_size during a
156 * write system call (nfs_rdwr), before the write request goes over the
157 * wire (before the file is actually modified on the server). If an OTW
158 * request occurs before the cached data is written to the server the file
159 * size returned from the server (r_attr.va_size) may not match r_size.
160 * r_size is the one we use, in general. r_attr.va_size is only used to
161 * determine whether or not our cached data is valid.
162 *
163 * Each rnode has 5 locks associated with it (not including the rnode
164 * hash table and free list locks):
165 *
166 * r_rwlock: Serializes nfs_write and nfs_setattr requests
167 * and allows nfs_read requests to proceed in parallel.
168 * Serializes reads/updates to directories.
169 *
170 * r_lkserlock: Serializes lock requests with map, write, and
171 * readahead operations.
172 *
173 * r_statelock: Protects all fields in the rnode except for
174 * those listed below. This lock is intented
175 * to be held for relatively short periods of
176 * time (not accross entire putpage operations,
177 * for example).
178 *
179 * r_statev4_lock: Protects the created_v4 flag, the lock_owners list,
180 * and all the delegation fields except r_deleg_list.
181 *
182 * r_os_lock: Protects r_open_streams.
183 *
184 *
185 * The following members are protected by the mutex rp4freelist_lock:
186 * r_freef
187 * r_freeb
188 *
189 * The following members are protected by the hash bucket rwlock:
190 * r_hashf
191 * r_hashb
192 *
193 * r_fh is read-only except when an rnode is created (or recycled from the
194 * free list).
195 *
196 * The following members are protected by nfs4_server_t::s_lock:
197 * r_deleg_list
198 *
199 * Note: r_modaddr is only accessed when the r_statelock mutex is held.
200 * Its value is also controlled via r_rwlock. It is assumed that
201 * there will be only 1 writer active at a time, so it safe to
202 * set r_modaddr and release r_statelock as long as the r_rwlock
203 * writer lock is held.
204 *
205 * r_inmap informs nfs4_read()/write() that there is a call to nfs4_map()
206 * in progress. nfs4_read()/write() check r_inmap to decide whether
207 * to perform directio on the file or not. r_inmap is atomically
208 * incremented in nfs4_map() before the address space routines are
209 * called and atomically decremented just before nfs4_map() exits.
210 * r_inmap is not protected by any lock.
211 *
212 * r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0
213 * while the rnode has mapped pages.
214 *
215 * 64-bit offsets: the code formerly assumed that atomic reads of
216 * r_size were safe and reliable; on 32-bit architectures, this is
217 * not true since an intervening bus cycle from another processor
218 * could update half of the size field. The r_statelock must now
219 * be held whenever any kind of access of r_size is made.
220 *
221 * Lock ordering:
222 * r_rwlock > r_lkserlock > r_os_lock > r_statelock > r_statev4_lock
223 * vnode_t::v_lock > r_os_lock
224 */
225 struct exportinfo; /* defined in nfs/export.h */
226 struct servinfo4; /* defined in nfs/nfs4_clnt.h */
227 struct failinfo; /* defined in nfs/nfs_clnt.h */
228 struct mntinfo4; /* defined in nfs/nfs4_clnt.h */
229
230 typedef struct rnode4 {
231 /* the hash fields must be first to match the rhashq_t */
232 struct rnode4 *r_hashf; /* hash queue forward pointer */
233 struct rnode4 *r_hashb; /* hash queue back pointer */
234 struct rnode4 *r_freef; /* free list forward pointer */
235 struct rnode4 *r_freeb; /* free list back pointer */
236 r4hashq_t *r_hashq; /* pointer to the hash bucket */
237
238 svnode_t r_svnode; /* "master" shadow vnode for file */
239 kmutex_t r_svlock; /* serializes access to svnode list */
240 nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */
241 nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */
242 kmutex_t r_statelock; /* protects (most of) rnode contents */
243 nfs4_sharedfh_t *r_fh; /* file handle */
244 struct servinfo4
245 *r_server; /* current server */
246 u_offset_t r_nextr; /* next byte read offset (read-ahead) */
247 uint_t r_flags; /* flags, see below */
248 short r_error; /* async write error */
249 cred_t *r_unlcred; /* unlinked credentials */
250 char *r_unlname; /* unlinked file name */
251 vnode_t *r_unldvp; /* parent dir of unlinked file */
252 vnode_t *r_xattr_dir; /* cached xattr dir vnode */
253 len_t r_size; /* client's view of file size */
254 vattr_t r_attr; /* cached vnode attributes */
255 hrtime_t r_time_attr_saved; /* time attributes were cached */
256 hrtime_t r_time_attr_inval; /* time attributes become invalid */
257 hrtime_t r_time_cache_inval; /* time caches become invalid */
258 time_t r_delay_wait; /* future time for DELAY handling */
259 int r_delay_interval; /* Number of Secs of last DELAY */
260 time_t r_last_recov; /* time of last recovery operation */
261 nfs4_recov_t r_recov_act; /* action from last recovery op */
262 long r_mapcnt; /* count of mmapped pages */
263 uint_t r_count; /* # of refs not reflect in v_count */
264 uint_t r_awcount; /* # of outstanding async write */
265 uint_t r_gcount; /* getattrs waiting to flush pages */
266 kcondvar_t r_cv; /* condvar for blocked threads */
267 int (*r_putapage) /* address of putapage routine */
268 (vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *);
269 void *r_dir; /* cache of readdir responses */
270 rddir4_cache *r_direof; /* pointer to the EOF entry */
271 symlink_cache r_symlink; /* cached readlink response */
272 verifier4 r_writeverf; /* file data write verifier */
273 u_offset_t r_modaddr; /* address for page in writerp */
274 commit_t r_commit; /* commit information */
275 u_offset_t r_truncaddr; /* base for truncate operation */
276 vsecattr_t *r_secattr; /* cached security attributes (acls) */
277 verifier4 r_cookieverf4; /* version 4 readdir cookie verifier */
278 nfs4_pathconf_info_t r_pathconf; /* cached pathconf info */
279 acache4_t *r_acache; /* list of access cache entries */
280 list_t r_open_streams; /* open streams list */
281 kmutex_t r_os_lock; /* protects r_open_streams */
282 nfs4_lock_owner_t
283 r_lo_head; /* lock owners list head */
284 int created_v4; /* 1 if file has been created in v4 */
285 kmutex_t r_statev4_lock; /* protects created_v4, state4ptr */
286
287 list_node_t r_deleg_link; /* linkage into list of */
288 /* delegated rnodes for this server */
289 open_delegation_type4
290 r_deleg_type; /* type of delegation granted */
291 stateid4 r_deleg_stateid;
292 /* delegation state id */
293 nfs_space_limit4
294 r_deleg_limit; /* file limits returned from */
295 /* server on delegated open */
296 nfsace4 r_deleg_perms; /* file permissions returned from */
297 /* server on delegated open */
298 fattr4_change r_deleg_change; /* current deleg change attr */
299 fattr4_change r_deleg_change_grant;
300 /* change @ write deleg grant */
301 cred_t *r_deleg_cred; /* credential in force when the */
302 /* delegation was granted */
303 open_delegation_type4
304 r_deleg_needs_recovery;
305 /* delegation needs recovery */
306 /* This contains the delegation type */
307 /* for use with CLAIM_PREVIOUS. */
308 /* OPEN_DELEGATE_NONE means recovery */
309 /* is not needed. */
310 unsigned r_deleg_needs_recall:1;
311 /* delegation has been recalled by */
312 /* the server during open with */
313 /* CLAIM_PREVIOUS */
314 unsigned r_deleg_return_pending:1;
315 /* delegreturn is pending, don't use */
316 /* the delegation stateid, set in */
317 /* nfs4_dlistadd */
318 unsigned r_deleg_return_inprog:1;
319 /* delegreturn is in progress, may */
320 /* only be set by nfs4delegreturn. */
321 nfs_rwlock_t r_deleg_recall_lock;
322 /* lock for synchronizing delegreturn */
323 /* with in other operations, acquired */
324 /* in read mode by nfs4_start_fop, */
325 /* acquired in write mode in */
326 /* nfs4delegreturn */
327 fattr4_change r_change; /* GETATTR4 change attr; client */
328 /* should always request change */
329 /* when c/mtime requested to keep */
330 /* change and c/mtime in sync */
331 fattr4_fileid r_mntd_fid; /* mounted on fileid attr */
332 kthread_t *r_serial; /* attrcache validation thread */
333 kthread_t *r_pgflush; /* thread flushing page cache */
334 list_t r_indelmap; /* list of delmap callers */
335 fattr4_fsid r_srv_fsid; /* fsid of srv fs containing object */
336 /* when rnode created; compare with */
337 /* sv_fsid (servinfo4_t) to see why */
338 /* stub type was set */
339 nfs4_stub_type_t r_stub_type;
340 /* e.g. mirror-mount or referral */
341 uint_t r_inmap; /* to serialize read/write and mmap */
342 } rnode4_t;
343
344 #define r_vnode r_svnode.sv_r_vnode
345
346 /*
347 * Flags
348 */
349 #define R4READDIRWATTR 0x1 /* Use READDIR with attributes */
350 #define R4DIRTY 0x2 /* dirty pages from write operation */
351 #define R4STALE 0x4 /* stale, don't even attempt to write */
352 #define R4MODINPROGRESS 0x8 /* page modification happening */
353 #define R4TRUNCATE 0x10 /* truncating, don't commit */
354 #define R4HAVEVERF 0x20 /* have a write verifier to compare against */
355 #define R4COMMIT 0x40 /* commit in progress */
356 #define R4COMMITWAIT 0x80 /* someone is waiting to do a commit */
357 #define R4HASHED 0x100 /* rnode is in hash queues */
358 #define R4OUTOFSPACE 0x200 /* an out of space error has happened */
359 #define R4LODANGLERS 0x400 /* rnode has dangling lock_owners to cleanup */
360 #define R4WRITEMODIFIED 0x800 /* file data has been modified by write */
361 #define R4DIRECTIO 0x1000 /* bypass the buffer cache */
362 #define R4RECOVERR 0x2000 /* couldn't recover */
363 #define R4RECEXPFH 0x4000 /* recovering expired filehandle */
364 #define R4RECOVERRP 0x8000 /* R4RECOVERR pending, but not set (yet) */
365 #define R4ISXATTR 0x20000 /* rnode is a named attribute */
366 #define R4DELMAPLIST 0x40000 /* delmap callers tracked for as callback */
367 #define R4PGFLUSH 0x80000 /* page flush thread active */
368 #define R4INCACHEPURGE 0x100000 /* purging caches due to file size change */
369 #define R4LOOKUP 0x200000 /* a lookup has been done in the directory */
370 /*
371 * Convert between vnode and rnode
372 */
373 #define RTOV4(rp) ((rp)->r_vnode)
374 #define VTOR4(vp) ((rnode4_t *)((vp)->v_data))
375
376 #define RP_ISSTUB(rp) (((rp)->r_stub_type != NFS4_STUB_NONE))
377 #define RP_ISSTUB_MIRRORMOUNT(rp) ((rp)->r_stub_type == NFS4_STUB_MIRRORMOUNT)
378 #define RP_ISSTUB_REFERRAL(rp) ((rp)->r_stub_type == NFS4_STUB_REFERRAL)
379
380 /*
381 * Open file instances.
382 */
383
384 typedef struct nfs4_opinst {
385 struct nfs4_opinst *re_next; /* next in list */
386 vnode_t *re_vp; /* held reference */
387 uint32_t re_numosp; /* number of valid open streams */
388 nfs4_open_stream_t **re_osp; /* held reference */
389 } nfs4_opinst_t;
390
391 #ifdef _KERNEL
392
393 extern volatile long nrnode;
394
395 /* Used for r_delay_interval */
396 #define NFS4_INITIAL_DELAY_INTERVAL 1
397 #define NFS4_MAX_DELAY_INTERVAL 20
398
399 /* Used for check_rtable4 */
400 #define NFSV4_RTABLE4_OK 0
401 #define NFSV4_RTABLE4_NOT_FREE_LIST 1
402 #define NFSV4_RTABLE4_DIRTY_PAGES 2
403 #define NFSV4_RTABLE4_POS_R_COUNT 3
404
405 extern rnode4_t *r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *);
406 extern rnode4_t *r4find_unlocked(nfs4_sharedfh_t *, struct vfs *);
407 extern void r4flush(struct vfs *, cred_t *);
408 extern void destroy_rtable4(struct vfs *, cred_t *);
409 extern int check_rtable4(struct vfs *);
410 extern void rp4_addfree(rnode4_t *, cred_t *);
411 extern void rp4_addhash(rnode4_t *);
412 extern void rp4_rmhash(rnode4_t *);
413 extern void rp4_rmhash_locked(rnode4_t *);
414 extern int rtable4hash(nfs4_sharedfh_t *);
415
416 extern vnode_t *makenfs4node(nfs4_sharedfh_t *, nfs4_ga_res_t *, struct vfs *,
417 hrtime_t, cred_t *, vnode_t *, nfs4_fname_t *);
418 extern vnode_t *makenfs4node_by_fh(nfs4_sharedfh_t *, nfs4_sharedfh_t *,
419 nfs4_fname_t **, nfs4_ga_res_t *, mntinfo4_t *, cred_t *, hrtime_t);
420
421 extern nfs4_opinst_t *r4mkopenlist(struct mntinfo4 *);
422 extern void r4releopenlist(nfs4_opinst_t *);
423 extern int r4find_by_fsid(mntinfo4_t *, fattr4_fsid *);
424
425 /* Access cache calls */
426 extern nfs4_access_type_t nfs4_access_check(rnode4_t *, uint32_t, cred_t *);
427 extern void nfs4_access_cache(rnode4_t *rp, uint32_t, uint32_t, cred_t *);
428 extern int nfs4_access_purge_rp(rnode4_t *);
429
430 extern int nfs4_free_data_reclaim(rnode4_t *);
431 extern void nfs4_rnode_invalidate(struct vfs *);
432
433 extern time_t r2lease_time(rnode4_t *);
434 extern int nfs4_directio(vnode_t *, int, cred_t *);
435
436 /* shadow vnode functions */
437 extern void sv_activate(vnode_t **, vnode_t *, nfs4_fname_t **, int);
438 extern vnode_t *sv_find(vnode_t *, vnode_t *, nfs4_fname_t **);
439 extern void sv_update_path(vnode_t *, char *, char *);
440 extern void sv_inactive(vnode_t *);
441 extern void sv_exchange(vnode_t **);
442 extern void sv_uninit(svnode_t *);
443 extern void nfs4_clear_open_streams(rnode4_t *);
444
445 /*
446 * Mark cached attributes as timed out
447 *
448 * The caller must not be holding the rnode r_statelock mutex.
449 */
450 #define PURGE_ATTRCACHE4_LOCKED(rp) \
451 rp->r_time_attr_inval = gethrtime(); \
452 rp->r_time_attr_saved = rp->r_time_attr_inval; \
453 rp->r_pathconf.pc4_xattr_valid = 0; \
454 rp->r_pathconf.pc4_cache_valid = 0;
455
456 #define PURGE_ATTRCACHE4(vp) { \
457 rnode4_t *rp = VTOR4(vp); \
458 mutex_enter(&rp->r_statelock); \
459 PURGE_ATTRCACHE4_LOCKED(rp); \
460 mutex_exit(&rp->r_statelock); \
461 }
462
463
464 extern void nfs4_async_readdir(vnode_t *, rddir4_cache *,
465 cred_t *, int (*)(vnode_t *, rddir4_cache *, cred_t *));
466 extern char *rnode4info(rnode4_t *rp);
467
468 extern int writerp4(rnode4_t *, caddr_t, int, struct uio *, int);
469 extern void nfs4_set_nonvattrs(rnode4_t *, struct nfs4attr_to_vattr *);
470 extern void nfs4delegabandon(rnode4_t *);
471 extern stateid4 nfs4_get_w_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
472 nfs_opnum4, nfs4_stateid_types_t *);
473 extern stateid4 nfs4_get_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
474 nfs_opnum4, nfs4_stateid_types_t *, bool_t);
475 extern nfsstat4 nfs4_find_or_create_lock_owner(pid_t, rnode4_t *, cred_t *,
476 nfs4_open_owner_t **, nfs4_open_stream_t **,
477 nfs4_lock_owner_t **);
478 extern cred_t *nfs4_get_otw_cred_by_osp(rnode4_t *, cred_t *,
479 nfs4_open_stream_t **, bool_t *, bool_t *);
480
481
482 /*
483 * Defines for the flag argument of nfs4delegreturn
484 */
485 #define NFS4_DR_FORCE 0x1 /* discard even if start_op fails */
486 #define NFS4_DR_PUSH 0x2 /* push modified data back to the server */
487 #define NFS4_DR_DISCARD 0x4 /* discard the delegation w/o delegreturn */
488 #define NFS4_DR_DID_OP 0x8 /* calling function did nfs4_start_op */
489 #define NFS4_DR_RECALL 0x10 /* delegreturn done in response to CB_RECALL */
490 #define NFS4_DR_REOPEN 0x20 /* perform file reopens, if applicable */
491
492 extern int nfs4delegreturn(rnode4_t *, int);
493 extern void nfs4_delegreturn_all(nfs4_server_t *);
494 extern void nfs4delegreturn_cleanup(rnode4_t *, nfs4_server_t *);
495 extern void nfs4_delegation_accept(rnode4_t *, open_claim_type4, OPEN4res *,
496 nfs4_ga_res_t *, cred_t *);
497
498 extern void nfs4_dlistclean(void);
499 extern void nfs4_deleg_discard(mntinfo4_t *, nfs4_server_t *);
500
501 extern void rddir4_cache_create(rnode4_t *);
502 extern void rddir4_cache_purge(rnode4_t *);
503 extern void rddir4_cache_destroy(rnode4_t *);
504 extern rddir4_cache *rddir4_cache_lookup(rnode4_t *, offset_t, int);
505 extern void rddir4_cache_rele(rnode4_t *, rddir4_cache *);
506
507 extern void r4_stub_mirrormount(rnode4_t *);
508 extern void r4_stub_referral(rnode4_t *);
509 extern void r4_stub_none(rnode4_t *);
510
511 #ifdef DEBUG
512 extern char *rddir4_cache_buf_alloc(size_t, int);
513 extern void rddir4_cache_buf_free(void *, size_t);
514 #endif
515
516
517
518 #endif /* _KERNEL */
519
520 #ifdef __cplusplus
521 }
522 #endif
523
524 #endif /* _NFS_RNODE4_H */