1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 
  39 /* LINTLIBRARY */
  40 /* PROTOLIB1 */
  41 
  42 /* NFS server */
  43 
  44 #include <sys/param.h>
  45 #include <sys/types.h>
  46 #include <sys/stat.h>
  47 #include <syslog.h>
  48 #include <tiuser.h>
  49 #include <rpc/rpc.h>
  50 #include <errno.h>
  51 #include <thread.h>
  52 #include <sys/resource.h>
  53 #include <sys/time.h>
  54 #include <sys/file.h>
  55 #include <nfs/nfs.h>
  56 #include <nfs/nfs_acl.h>
  57 #include <nfs/nfssys.h>
  58 #include <stdio.h>
  59 #include <stdio_ext.h>
  60 #include <stdlib.h>
  61 #include <signal.h>
  62 #include <netconfig.h>
  63 #include <netdir.h>
  64 #include <string.h>
  65 #include <unistd.h>
  66 #include <limits.h>
  67 #include <stropts.h>
  68 #include <sys/tihdr.h>
  69 #include <sys/wait.h>
  70 #include <poll.h>
  71 #include <priv_utils.h>
  72 #include <sys/tiuser.h>
  73 #include <netinet/tcp.h>
  74 #include <deflt.h>
  75 #include <rpcsvc/daemon_utils.h>
  76 #include <rpcsvc/nfs4_prot.h>
  77 #include <libnvpair.h>
  78 #include <libscf.h>
  79 #include <libshare.h>
  80 #include "nfs_tbind.h"
  81 #include "thrpool.h"
  82 #include "smfcfg.h"
  83 
  84 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
  85 #define QUIESCE_VERSMIN 4
  86 /* DSS: distributed stable storage */
  87 #define DSS_VERSMIN     4
  88 
  89 static  int     nfssvc(int, struct netbuf, struct netconfig *);
  90 static  int     nfssvcpool(int maxservers);
  91 static  int     dss_init(uint_t npaths, char **pathnames);
  92 static  void    dss_mkleafdirs(uint_t npaths, char **pathnames);
  93 static  void    dss_mkleafdir(char *dir, char *leaf, char *path);
  94 static  void    usage(void);
  95 int             qstrcmp(const void *s1, const void *s2);
  96 
  97 extern  int     _nfssys(int, void *);
  98 
  99 extern int      daemonize_init(void);
 100 extern void     daemonize_fini(int fd);
 101 
 102 /* signal handlers */
 103 static void sigflush(int);
 104 static void quiesce(int);
 105 
 106 static  char    *MyName;
 107 static  NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
 108                                             "/dev/udp6", NULL };
 109 /* static       NETSELDECL(defaultprotos)[] =   { NC_UDP, NC_TCP, NULL }; */
 110 /*
 111  * The following are all globals used by routines in nfs_tbind.c.
 112  */
 113 size_t  end_listen_fds;         /* used by conn_close_oldest() */
 114 size_t  num_fds = 0;            /* used by multiple routines */
 115 int     listen_backlog = 32;    /* used by bind_to_{provider,proto}() */
 116 int     num_servers;            /* used by cots_listen_event() */
 117 int     (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
 118                                 /* used by cots_listen_event() */
 119 int     max_conns_allowed = -1; /* used by cots_listen_event() */
 120 
 121 /*
 122  * Keep track of min/max versions of NFS protocol to be started.
 123  * Start with the defaults (min == 2, max == 3).  We have the
 124  * capability of starting vers=4 but only if the user requests it.
 125  */
 126 int     nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 127 int     nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 128 
 129 /*
 130  * Set the default for server delegation enablement and set per
 131  * /etc/default/nfs configuration (if present).
 132  */
 133 int     nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
 134 
 135 int
 136 main(int ac, char *av[])
 137 {
 138         char *dir = "/";
 139         int allflag = 0;
 140         int df_allflag = 0;
 141         int opt_cnt = 0;
 142         int maxservers = 1024;  /* zero allows inifinte number of threads */
 143         int maxservers_set = 0;
 144         int logmaxservers = 0;
 145         int pid;
 146         int i;
 147         char *provider = (char *)NULL;
 148         char *df_provider = (char *)NULL;
 149         struct protob *protobp0, *protobp;
 150         NETSELDECL(proto) = NULL;
 151         NETSELDECL(df_proto) = NULL;
 152         NETSELPDECL(providerp);
 153         char *defval;
 154         boolean_t can_do_mlp;
 155         uint_t dss_npaths = 0;
 156         char **dss_pathnames = NULL;
 157         sigset_t sgset;
 158         char name[PATH_MAX], value[PATH_MAX];
 159         int ret, bufsz;
 160 
 161         int pipe_fd = -1;
 162 
 163         MyName = *av;
 164 
 165         /*
 166          * Initializations that require more privileges than we need to run.
 167          */
 168         (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
 169         svcsetprio();
 170 
 171         can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
 172         if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
 173             DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
 174             can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
 175                 (void) fprintf(stderr, "%s should be run with"
 176                     " sufficient privileges\n", av[0]);
 177                 exit(1);
 178         }
 179 
 180         /* Nfsd cannot run in a non-global zone. */
 181         if (getzoneid() != GLOBAL_ZONEID) {
 182                 (void) fprintf(stderr, "%s: can only run in the global zone\n",
 183                     av[0]);
 184                 exit(1);
 185         }
 186 
 187         (void) enable_extended_FILE_stdio(-1, -1);
 188 
 189         /*
 190          * Read in the values from SMF first before we check
 191          * command line options so the options override SMF values.
 192          */
 193         bufsz = PATH_MAX;
 194         ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE,
 195             SCF_TYPE_INTEGER, NFSD, &bufsz);
 196         if (ret == SA_OK) {
 197                 errno = 0;
 198                 max_conns_allowed = strtol(value, (char **)NULL, 10);
 199                 if (errno != 0)
 200                         max_conns_allowed = -1;
 201         }
 202 
 203         bufsz = PATH_MAX;
 204         ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE,
 205             SCF_TYPE_INTEGER, NFSD, &bufsz);
 206         if (ret == SA_OK) {
 207                 errno = 0;
 208                 listen_backlog = strtol(value, (char **)NULL, 10);
 209                 if (errno != 0) {
 210                         listen_backlog = 32;
 211                 }
 212         }
 213 
 214         bufsz = PATH_MAX;
 215         ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE,
 216             SCF_TYPE_ASTRING, NFSD, &bufsz);
 217         if ((ret == SA_OK) && strlen(value) > 0) {
 218                 df_proto = strdup(value);
 219                 opt_cnt++;
 220                 if (strncasecmp("ALL", value, 3) == 0) {
 221                         free(df_proto);
 222                         df_proto = NULL;
 223                         df_allflag = 1;
 224                 }
 225         }
 226 
 227         bufsz = PATH_MAX;
 228         ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE,
 229             SCF_TYPE_ASTRING, NFSD, &bufsz);
 230         if ((ret == SA_OK) && strlen(value) > 0) {
 231                 df_provider = strdup(value);
 232                 opt_cnt++;
 233         }
 234 
 235         bufsz = PATH_MAX;
 236         ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE,
 237             SCF_TYPE_INTEGER, NFSD, &bufsz);
 238         if (ret == SA_OK) {
 239                 errno = 0;
 240                 maxservers = strtol(value, (char **)NULL, 10);
 241                 if (errno != 0)
 242                         maxservers = 1024;
 243                 else
 244                         maxservers_set = 1;
 245         }
 246 
 247         bufsz = 4;
 248         ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE,
 249             SCF_TYPE_INTEGER, NFSD, &bufsz);
 250         if (ret == SA_OK)
 251                 nfs_server_vers_min = strtol(value, (char **)NULL, 10);
 252 
 253         bufsz = 4;
 254         ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE,
 255             SCF_TYPE_INTEGER, NFSD, &bufsz);
 256         if (ret == SA_OK)
 257                 nfs_server_vers_max = strtol(value, (char **)NULL, 10);
 258 
 259         bufsz = PATH_MAX;
 260         ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE,
 261             SCF_TYPE_ASTRING, NFSD, &bufsz);
 262         if (ret == SA_OK)
 263                 if (strncasecmp(value, "off", 3) == 0)
 264                         nfs_server_delegation = FALSE;
 265 
 266         /*
 267          * Conflict options error messages.
 268          */
 269         if (opt_cnt > 1) {
 270                 (void) fprintf(stderr, "\nConflicting options, only one of "
 271                     "the following options can be specified\n"
 272                     "in SMF:\n"
 273                     "\tprotocol=ALL\n"
 274                     "\tprotocol=protocol\n"
 275                     "\tdevice=devicename\n\n");
 276                 usage();
 277         }
 278         opt_cnt = 0;
 279 
 280         while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
 281                 switch (i) {
 282                 case 'a':
 283                         free(df_proto);
 284                         df_proto = NULL;
 285                         free(df_provider);
 286                         df_provider = NULL;
 287 
 288                         allflag = 1;
 289                         opt_cnt++;
 290                         break;
 291 
 292                 case 'c':
 293                         max_conns_allowed = atoi(optarg);
 294                         break;
 295 
 296                 case 'p':
 297                         proto = optarg;
 298                         df_allflag = 0;
 299                         opt_cnt++;
 300                         break;
 301 
 302                 /*
 303                  * DSS: NFSv4 distributed stable storage.
 304                  *
 305                  * This is a Contracted Project Private interface, for
 306                  * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
 307                  */
 308                 case 's':
 309                         if (strlen(optarg) < MAXPATHLEN) {
 310                                 /* first "-s" option encountered? */
 311                                 if (dss_pathnames == NULL) {
 312                                         /*
 313                                          * Allocate maximum possible space
 314                                          * required given cmdline arg count;
 315                                          * "-s <path>" consumes two args.
 316                                          */
 317                                         size_t sz = (ac / 2) * sizeof (char *);
 318                                         dss_pathnames = (char **)malloc(sz);
 319                                         if (dss_pathnames == NULL) {
 320                                                 (void) fprintf(stderr, "%s: "
 321                                                     "dss paths malloc failed\n",
 322                                                     av[0]);
 323                                                 exit(1);
 324                                         }
 325                                         (void) memset(dss_pathnames, 0, sz);
 326                                 }
 327                                 dss_pathnames[dss_npaths] = optarg;
 328                                 dss_npaths++;
 329                         } else {
 330                                 (void) fprintf(stderr,
 331                                     "%s: -s pathname too long.\n", av[0]);
 332                         }
 333                         break;
 334 
 335                 case 't':
 336                         provider = optarg;
 337                         df_allflag = 0;
 338                         opt_cnt++;
 339                         break;
 340 
 341                 case 'l':
 342                         listen_backlog = atoi(optarg);
 343                         break;
 344 
 345                 case '?':
 346                         usage();
 347                         /* NOTREACHED */
 348                 }
 349         }
 350 
 351         allflag = df_allflag;
 352         if (proto == NULL)
 353                 proto = df_proto;
 354         if (provider == NULL)
 355                 provider = df_provider;
 356 
 357         /*
 358          * Conflict options error messages.
 359          */
 360         if (opt_cnt > 1) {
 361                 (void) fprintf(stderr, "\nConflicting options, only one of "
 362                     "the following options can be specified\n"
 363                     "on the command line:\n"
 364                     "\t-a\n"
 365                     "\t-p protocol\n"
 366                     "\t-t transport\n\n");
 367                 usage();
 368         }
 369 
 370         if (proto != NULL &&
 371             strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
 372                 if (nfs_server_vers_max == NFS_V4) {
 373                         if (nfs_server_vers_min == NFS_V4) {
 374                                 fprintf(stderr,
 375                                     "NFS version 4 is not supported "
 376                                     "with the UDP protocol.  Exiting\n");
 377                                 exit(3);
 378                         } else {
 379                                 fprintf(stderr,
 380                                     "NFS version 4 is not supported "
 381                                     "with the UDP protocol.\n");
 382                         }
 383                 }
 384         }
 385 
 386         /*
 387          * If there is exactly one more argument, it is the number of
 388          * servers.
 389          */
 390         if (optind == ac - 1) {
 391                 maxservers = atoi(av[optind]);
 392                 maxservers_set = 1;
 393         }
 394         /*
 395          * If there are two or more arguments, then this is a usage error.
 396          */
 397         else if (optind < ac - 1)
 398                 usage();
 399         /*
 400          * Check the ranges for min/max version specified
 401          */
 402         else if ((nfs_server_vers_min > nfs_server_vers_max) ||
 403             (nfs_server_vers_min < NFS_VERSMIN) ||
 404             (nfs_server_vers_max > NFS_VERSMAX))
 405                 usage();
 406         /*
 407          * There are no additional arguments, and we haven't set maxservers
 408          * explicitly via the config file, we use a default number of
 409          * servers.  We will log this.
 410          */
 411         else if (maxservers_set == 0)
 412                 logmaxservers = 1;
 413 
 414         /*
 415          * Basic Sanity checks on options
 416          *
 417          * max_conns_allowed must be positive, except for the special
 418          * value of -1 which is used internally to mean unlimited, -1 isn't
 419          * documented but we allow it anyway.
 420          *
 421          * maxservers must be positive
 422          * listen_backlog must be positive or zero
 423          */
 424         if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
 425             (listen_backlog < 0) || (maxservers <= 0)) {
 426                 usage();
 427         }
 428 
 429         /*
 430          * Set current dir to server root
 431          */
 432         if (chdir(dir) < 0) {
 433                 (void) fprintf(stderr, "%s:  ", MyName);
 434                 perror(dir);
 435                 exit(1);
 436         }
 437 
 438 #ifndef DEBUG
 439         pipe_fd = daemonize_init();
 440 #endif
 441 
 442         openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
 443 
 444         /*
 445          * establish our lock on the lock file and write our pid to it.
 446          * exit if some other process holds the lock, or if there's any
 447          * error in writing/locking the file.
 448          */
 449         pid = _enter_daemon_lock(NFSD);
 450         switch (pid) {
 451         case 0:
 452                 break;
 453         case -1:
 454                 fprintf(stderr, "error locking for %s: %s\n", NFSD,
 455                     strerror(errno));
 456                 exit(2);
 457         default:
 458                 /* daemon was already running */
 459                 exit(0);
 460         }
 461 
 462         /*
 463          * If we've been given a list of paths to be used for distributed
 464          * stable storage, and provided we're going to run a version
 465          * that supports it, setup the DSS paths.
 466          */
 467         if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
 468                 if (dss_init(dss_npaths, dss_pathnames) != 0) {
 469                         fprintf(stderr, "%s", "dss_init failed. Exiting.\n");
 470                         exit(1);
 471                 }
 472         }
 473 
 474         /*
 475          * Block all signals till we spawn other
 476          * threads.
 477          */
 478         (void) sigfillset(&sgset);
 479         (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
 480 
 481         if (logmaxservers) {
 482                 fprintf(stderr,
 483                     "Number of servers not specified. Using default of %d.\n",
 484                     maxservers);
 485         }
 486 
 487         /*
 488          * Make sure to unregister any previous versions in case the
 489          * user is reconfiguring the server in interesting ways.
 490          */
 491         svc_unreg(NFS_PROGRAM, NFS_VERSION);
 492         svc_unreg(NFS_PROGRAM, NFS_V3);
 493         svc_unreg(NFS_PROGRAM, NFS_V4);
 494         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
 495         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
 496 
 497         /*
 498          * Set up kernel RPC thread pool for the NFS server.
 499          */
 500         if (nfssvcpool(maxservers)) {
 501                 fprintf(stderr, "Can't set up kernel NFS service: %s. "
 502                     "Exiting.\n", strerror(errno));
 503                 exit(1);
 504         }
 505 
 506         /*
 507          * Set up blocked thread to do LWP creation on behalf of the kernel.
 508          */
 509         if (svcwait(NFS_SVCPOOL_ID)) {
 510                 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n",
 511                     strerror(errno));
 512                 exit(1);
 513         }
 514 
 515         /*
 516          * RDMA start and stop thread.
 517          * Per pool RDMA listener creation and
 518          * destructor thread.
 519          *
 520          * start rdma services and block in the kernel.
 521          * (only if proto or provider is not set to TCP or UDP)
 522          */
 523         if ((proto == NULL) && (provider == NULL)) {
 524                 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
 525                     nfs_server_vers_max, nfs_server_delegation)) {
 526                         fprintf(stderr,
 527                             "Can't set up RDMA creator thread : %s\n",
 528                             strerror(errno));
 529                 }
 530         }
 531 
 532         /*
 533          * Now open up for signal delivery
 534          */
 535 
 536         (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
 537         sigset(SIGTERM, sigflush);
 538         sigset(SIGUSR1, quiesce);
 539 
 540         /*
 541          * Build a protocol block list for registration.
 542          */
 543         protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
 544         protobp->serv = "NFS";
 545         protobp->versmin = nfs_server_vers_min;
 546         protobp->versmax = nfs_server_vers_max;
 547         protobp->program = NFS_PROGRAM;
 548 
 549         protobp->next = (struct protob *)malloc(sizeof (struct protob));
 550         protobp = protobp->next;
 551         protobp->serv = "NFS_ACL";           /* not used */
 552         protobp->versmin = nfs_server_vers_min;
 553         /* XXX - this needs work to get the version just right */
 554         protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
 555             NFS_ACL_V3 : nfs_server_vers_max;
 556         protobp->program = NFS_ACL_PROGRAM;
 557         protobp->next = (struct protob *)NULL;
 558 
 559         if (allflag) {
 560                 if (do_all(protobp0, nfssvc) == -1) {
 561                         fprintf(stderr, "setnetconfig failed : %s\n",
 562                             strerror(errno));
 563                         exit(1);
 564                 }
 565         } else if (proto) {
 566                 /* there's more than one match for the same protocol */
 567                 struct netconfig *nconf;
 568                 NCONF_HANDLE *nc;
 569                 bool_t  protoFound = FALSE;
 570                 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
 571                         fprintf(stderr, "setnetconfig failed : %s\n",
 572                             strerror(errno));
 573                         goto done;
 574                 }
 575                 while (nconf = getnetconfig(nc)) {
 576                         if (strcmp(nconf->nc_proto, proto) == 0) {
 577                                 protoFound = TRUE;
 578                                 do_one(nconf->nc_device, NULL,
 579                                     protobp0, nfssvc);
 580                         }
 581                 }
 582                 (void) endnetconfig(nc);
 583                 if (protoFound == FALSE) {
 584                         fprintf(stderr,
 585                             "couldn't find netconfig entry for protocol %s\n",
 586                             proto);
 587                 }
 588         } else if (provider)
 589                 do_one(provider, proto, protobp0, nfssvc);
 590         else {
 591                 for (providerp = defaultproviders;
 592                     *providerp != NULL; providerp++) {
 593                         provider = *providerp;
 594                         do_one(provider, NULL, protobp0, nfssvc);
 595                 }
 596         }
 597 done:
 598 
 599         free(protobp);
 600         free(protobp0);
 601 
 602         if (num_fds == 0) {
 603                 fprintf(stderr, "Could not start NFS service for any protocol."
 604                     " Exiting.\n");
 605                 exit(1);
 606         }
 607 
 608         end_listen_fds = num_fds;
 609 
 610         /*
 611          * nfsd is up and running as far as we are concerned.
 612          */
 613         daemonize_fini(pipe_fd);
 614 
 615         /*
 616          * Get rid of unneeded privileges.
 617          */
 618         __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
 619             PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
 620 
 621         /*
 622          * Poll for non-data control events on the transport descriptors.
 623          */
 624         poll_for_action();
 625 
 626         /*
 627          * If we get here, something failed in poll_for_action().
 628          */
 629         return (1);
 630 }
 631 
 632 static int
 633 nfssvcpool(int maxservers)
 634 {
 635         struct svcpool_args npa;
 636 
 637         npa.id = NFS_SVCPOOL_ID;
 638         npa.maxthreads = maxservers;
 639         npa.redline = 0;
 640         npa.qsize = 0;
 641         npa.timeout = 0;
 642         npa.stksize = 0;
 643         npa.max_same_xprt = 0;
 644         return (_nfssys(SVCPOOL_CREATE, &npa));
 645 }
 646 
 647 /*
 648  * Establish NFS service thread.
 649  */
 650 static int
 651 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
 652 {
 653         struct nfs_svc_args nsa;
 654 
 655         nsa.fd = fd;
 656         nsa.netid = nconf->nc_netid;
 657         nsa.addrmask = addrmask;
 658         if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
 659                 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
 660                     NFS_V3 : nfs_server_vers_max;
 661                 nsa.versmin = nfs_server_vers_min;
 662                 /*
 663                  * If no version left, silently do nothing, previous
 664                  * checks will have assured at least TCP is available.
 665                  */
 666                 if (nsa.versmin > nsa.versmax)
 667                         return (0);
 668         } else {
 669                 nsa.versmax = nfs_server_vers_max;
 670                 nsa.versmin = nfs_server_vers_min;
 671         }
 672         nsa.delegation = nfs_server_delegation;
 673         return (_nfssys(NFS_SVC, &nsa));
 674 }
 675 
 676 static void
 677 usage(void)
 678 {
 679         (void) fprintf(stderr,
 680 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
 681         (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
 682         (void) fprintf(stderr,
 683 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
 684         (void) fprintf(stderr,
 685 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
 686         (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
 687         (void) fprintf(stderr, "> zero,\n");
 688         (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
 689         (void) fprintf(stderr,
 690             "\ttransport is a transport provider name (i.e. device),\n");
 691         (void) fprintf(stderr,
 692             "\tlisten_backlog is the TCP listen backlog,\n");
 693         (void) fprintf(stderr,
 694             "\tand <nservers> must be a decimal number > zero.\n");
 695         exit(1);
 696 }
 697 
 698 /*
 699  * Issue nfssys system call to flush all logging buffers asynchronously.
 700  *
 701  * NOTICE: It is extremely important to flush NFS logging buffers when
 702  *         nfsd exits. When the system is halted or rebooted nfslogd
 703  *         may not have an opportunity to flush the buffers.
 704  */
 705 static void
 706 nfsl_flush()
 707 {
 708         struct nfsl_flush_args nfa;
 709 
 710         memset((void *)&nfa, 0, sizeof (nfa));
 711         nfa.version = NFSL_FLUSH_ARGS_VERS;
 712         nfa.directive = NFSL_ALL;       /* flush all asynchronously */
 713 
 714         if (_nfssys(LOG_FLUSH, &nfa) < 0)
 715                 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
 716                     strerror(errno));
 717 }
 718 
 719 /*
 720  * SIGTERM handler.
 721  * Flush logging buffers and exit.
 722  */
 723 static void
 724 sigflush(int sig)
 725 {
 726         nfsl_flush();
 727         _exit(0);
 728 }
 729 
 730 /*
 731  * SIGUSR1 handler.
 732  *
 733  * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
 734  *
 735  * This is a Contracted Project Private interface, for the sole use
 736  * of Sun Cluster HA-NFS. See PSARC/2004/497.
 737  *
 738  * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
 739  */
 740 static void
 741 quiesce(int sig)
 742 {
 743         int error;
 744         int id = NFS_SVCPOOL_ID;
 745 
 746         if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
 747                 /* Request server quiesce at next shutdown */
 748                 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
 749 
 750                 /*
 751                  * ENOENT is returned if there is no matching SVC pool
 752                  * for the id. Possibly because the pool is not yet setup.
 753                  * In this case, just exit as if no error. For all other errors,
 754                  * just return and allow caller to retry.
 755                  */
 756                 if (error && errno != ENOENT) {
 757                         syslog(LOG_ERR,
 758                             "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
 759                             strerror(errno));
 760                         return;
 761                 }
 762         }
 763 
 764         /* Flush logging buffers */
 765         nfsl_flush();
 766 
 767         _exit(0);
 768 }
 769 
 770 /*
 771  * DSS: distributed stable storage.
 772  * Create leaf directories as required, keeping an eye on path
 773  * lengths. Calls exit(1) on failure.
 774  * The pathnames passed in must already exist, and must be writeable by nfsd.
 775  * Note: the leaf directories under NFS4_VAR_DIR are not created here;
 776  * they're created at pkg install.
 777  */
 778 static void
 779 dss_mkleafdirs(uint_t npaths, char **pathnames)
 780 {
 781         int i;
 782         char *tmppath = NULL;
 783 
 784         /*
 785          * Create the temporary storage used by dss_mkleafdir() here,
 786          * rather than in that function, so that it only needs to be
 787          * done once, rather than once for each call. Too big to put
 788          * on the function's stack.
 789          */
 790         tmppath = (char *)malloc(MAXPATHLEN);
 791         if (tmppath == NULL) {
 792                 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
 793                 exit(1);
 794         }
 795 
 796         for (i = 0; i < npaths; i++) {
 797                 char *p = pathnames[i];
 798 
 799                 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
 800                 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
 801         }
 802 
 803         free(tmppath);
 804 }
 805 
 806 /*
 807  * Create "leaf" in "dir" (which must already exist).
 808  * leaf: should start with a '/'
 809  */
 810 static void
 811 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
 812 {
 813         /* MAXPATHLEN includes the terminating NUL */
 814         if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
 815                 fprintf(stderr, "stable storage path too long: %s%s. "
 816                     "Exiting.\n", dir, leaf);
 817                 exit(1);
 818         }
 819 
 820         (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
 821 
 822         /* the directory may already exist: that's OK */
 823         if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
 824                 fprintf(stderr, "error creating stable storage directory: "
 825                     "%s: %s. Exiting.\n", strerror(errno), tmppath);
 826                 exit(1);
 827         }
 828 }
 829 
 830 /*
 831  * Create the storage dirs, and pass the path list to the kernel.
 832  * This requires the nfssrv module to be loaded; the _nfssys() syscall
 833  * will fail ENOTSUP if it is not.
 834  * Use libnvpair(3LIB) to pass the data to the kernel.
 835  */
 836 static int
 837 dss_init(uint_t npaths, char **pathnames)
 838 {
 839         int i, j, nskipped, error;
 840         char *bufp;
 841         uint32_t bufsize;
 842         size_t buflen;
 843         nvlist_t *nvl;
 844 
 845         if (npaths > 1) {
 846                 /*
 847                  * We need to remove duplicate paths; this might be user error
 848                  * in the general case, but HA-NFSv4 can also cause this.
 849                  * Sort the pathnames array, and NULL out duplicates,
 850                  * then write the non-NULL entries to a new array.
 851                  * Sorting will also allow the kernel to optimise its searches.
 852                  */
 853 
 854                 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
 855 
 856                 /* now NULL out any duplicates */
 857                 i = 0; j = 1; nskipped = 0;
 858                 while (j < npaths) {
 859                         if (strcmp(pathnames[i], pathnames[j]) == 0) {
 860                                 pathnames[j] = NULL;
 861                                 j++;
 862                                 nskipped++;
 863                                 continue;
 864                         }
 865 
 866                         /* skip i over any of its NULLed duplicates */
 867                         i = j++;
 868                 }
 869 
 870                 /* finally, write the non-NULL entries to a new array */
 871                 if (nskipped > 0) {
 872                         int nreal;
 873                         size_t sz;
 874                         char **tmp_pathnames;
 875 
 876                         nreal = npaths - nskipped;
 877 
 878                         sz = nreal * sizeof (char *);
 879                         tmp_pathnames = (char **)malloc(sz);
 880                         if (tmp_pathnames == NULL) {
 881                                 fprintf(stderr, "tmp_pathnames malloc "
 882                                     "failed\n");
 883                                 exit(1);
 884                         }
 885 
 886                         for (i = 0, j = 0; i < npaths; i++)
 887                                 if (pathnames[i] != NULL)
 888                                         tmp_pathnames[j++] = pathnames[i];
 889                         free(pathnames);
 890                         pathnames = tmp_pathnames;
 891                         npaths = nreal;
 892                 }
 893 
 894         }
 895 
 896         /* Create directories to store the distributed state files */
 897         dss_mkleafdirs(npaths, pathnames);
 898 
 899         /* Create the name-value pair list */
 900         error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
 901         if (error) {
 902                 fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno));
 903                 return (1);
 904         }
 905 
 906         /* Add the pathnames array as a single name-value pair */
 907         error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
 908             pathnames, npaths);
 909         if (error) {
 910                 fprintf(stderr, "nvlist_add_string_array failed: %s\n",
 911                     strerror(errno));
 912                 nvlist_free(nvl);
 913                 return (1);
 914         }
 915 
 916         /*
 917          * Pack list into contiguous memory, for passing to kernel.
 918          * nvlist_pack() will allocate the memory for the buffer,
 919          * which we should free() when no longer needed.
 920          * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
 921          */
 922         bufp = NULL;
 923         error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
 924         if (error) {
 925                 fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno));
 926                 nvlist_free(nvl);
 927                 return (1);
 928         }
 929 
 930         /* Now we have the packed buffer, we no longer need the list */
 931         nvlist_free(nvl);
 932 
 933         /*
 934          * Let the kernel know in advance how big the buffer is.
 935          * NOTE: we cannot just pass buflen, since size_t is a long, and
 936          * thus a different size between ILP32 userland and LP64 kernel.
 937          * Use an int for the transfer, since that should be big enough;
 938          * this is a no-op at the moment, here, since nfsd is 32-bit, but
 939          * that could change.
 940          */
 941         bufsize = (uint32_t)buflen;
 942         error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
 943         if (error) {
 944                 fprintf(stderr,
 945                     "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n",
 946                     strerror(errno));
 947                 free(bufp);
 948                 return (1);
 949         }
 950 
 951         /* Pass the packed buffer to the kernel */
 952         error = _nfssys(NFS4_DSS_SETPATHS, bufp);
 953         if (error) {
 954                 fprintf(stderr,
 955                     "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno));
 956                 free(bufp);
 957                 return (1);
 958         }
 959 
 960         /*
 961          * The kernel has now unpacked the buffer and extracted the
 962          * pathnames array, we no longer need the buffer.
 963          */
 964         free(bufp);
 965 
 966         return (0);
 967 }
 968 
 969 /*
 970  * Quick sort string compare routine, for qsort.
 971  * Needed to make arg types correct.
 972  */
 973 int
 974 qstrcmp(const void *p1, const void *p2)
 975 {
 976         char *s1 = *((char **)p1);
 977         char *s2 = *((char **)p2);
 978 
 979         return (strcmp(s1, s2));
 980 }