1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  26 /*        All Rights Reserved   */
  27 
  28 /*
  29  * University Copyright- Copyright (c) 1982, 1986, 1988
  30  * The Regents of the University of California
  31  * All Rights Reserved
  32  *
  33  * University Acknowledgment- Portions of this document are derived from
  34  * software developed by the University of California, Berkeley, and its
  35  * contributors.
  36  */
  37 
  38 /* LINTLIBRARY */
  39 /* PROTOLIB1 */
  40 
  41 /* NFS server */
  42 
  43 #include <sys/param.h>
  44 #include <sys/types.h>
  45 #include <sys/stat.h>
  46 #include <syslog.h>
  47 #include <tiuser.h>
  48 #include <rpc/rpc.h>
  49 #include <errno.h>
  50 #include <thread.h>
  51 #include <sys/resource.h>
  52 #include <sys/time.h>
  53 #include <sys/file.h>
  54 #include <nfs/nfs.h>
  55 #include <nfs/nfs_acl.h>
  56 #include <nfs/nfssys.h>
  57 #include <stdio.h>
  58 #include <stdio_ext.h>
  59 #include <stdlib.h>
  60 #include <signal.h>
  61 #include <netconfig.h>
  62 #include <netdir.h>
  63 #include <string.h>
  64 #include <unistd.h>
  65 #include <limits.h>
  66 #include <stropts.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/wait.h>
  69 #include <poll.h>
  70 #include <priv_utils.h>
  71 #include <sys/tiuser.h>
  72 #include <netinet/tcp.h>
  73 #include <deflt.h>
  74 #include <rpcsvc/daemon_utils.h>
  75 #include <rpcsvc/nfs4_prot.h>
  76 #include <libnvpair.h>
  77 #include <libscf.h>
  78 #include <libshare.h>
  79 #include "nfs_tbind.h"
  80 #include "thrpool.h"
  81 #include "smfcfg.h"
  82 
  83 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
  84 #define QUIESCE_VERSMIN 4
  85 /* DSS: distributed stable storage */
  86 #define DSS_VERSMIN     4
  87 
  88 static  int     nfssvc(int, struct netbuf, struct netconfig *);
  89 static  int     nfssvcpool(int maxservers);
  90 static  int     dss_init(uint_t npaths, char **pathnames);
  91 static  void    dss_mkleafdirs(uint_t npaths, char **pathnames);
  92 static  void    dss_mkleafdir(char *dir, char *leaf, char *path);
  93 static  void    usage(void);
  94 int             qstrcmp(const void *s1, const void *s2);
  95 
  96 extern  int     _nfssys(int, void *);
  97 
  98 extern int      daemonize_init(void);
  99 extern void     daemonize_fini(int fd);
 100 
 101 /* signal handlers */
 102 static void sigflush(int);
 103 static void quiesce(int);
 104 
 105 static  char    *MyName;
 106 static  NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
 107                                             "/dev/udp6", NULL };
 108 /* static       NETSELDECL(defaultprotos)[] =   { NC_UDP, NC_TCP, NULL }; */
 109 /*
 110  * The following are all globals used by routines in nfs_tbind.c.
 111  */
 112 size_t  end_listen_fds;         /* used by conn_close_oldest() */
 113 size_t  num_fds = 0;            /* used by multiple routines */
 114 int     listen_backlog = 32;    /* used by bind_to_{provider,proto}() */
 115 int     num_servers;            /* used by cots_listen_event() */
 116 int     (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
 117                                 /* used by cots_listen_event() */
 118 int     max_conns_allowed = -1; /* used by cots_listen_event() */
 119 
 120 /*
 121  * Keep track of min/max versions of NFS protocol to be started.
 122  * Start with the defaults (min == 2, max == 3).  We have the
 123  * capability of starting vers=4 but only if the user requests it.
 124  */
 125 int     nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 126 int     nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 127 
 128 /*
 129  * Set the default for server delegation enablement and set per
 130  * /etc/default/nfs configuration (if present).
 131  */
 132 int     nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
 133 
 134 int
 135 main(int ac, char *av[])
 136 {
 137         char *dir = "/";
 138         int allflag = 0;
 139         int df_allflag = 0;
 140         int opt_cnt = 0;
 141         int maxservers = 1024;  /* zero allows inifinte number of threads */
 142         int maxservers_set = 0;
 143         int logmaxservers = 0;
 144         int pid;
 145         int i;
 146         char *provider = (char *)NULL;
 147         char *df_provider = (char *)NULL;
 148         struct protob *protobp0, *protobp;
 149         NETSELDECL(proto) = NULL;
 150         NETSELDECL(df_proto) = NULL;
 151         NETSELPDECL(providerp);
 152         char *defval;
 153         boolean_t can_do_mlp;
 154         uint_t dss_npaths = 0;
 155         char **dss_pathnames = NULL;
 156         sigset_t sgset;
 157         char name[PATH_MAX], value[PATH_MAX];
 158         int ret, bufsz;
 159 
 160         int pipe_fd = -1;
 161 
 162         MyName = *av;
 163 
 164         /*
 165          * Initializations that require more privileges than we need to run.
 166          */
 167         (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
 168         svcsetprio();
 169 
 170         can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
 171         if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
 172             DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
 173             can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
 174                 (void) fprintf(stderr, "%s should be run with"
 175                     " sufficient privileges\n", av[0]);
 176                 exit(1);
 177         }
 178 
 179         (void) enable_extended_FILE_stdio(-1, -1);
 180 
 181         /*
 182          * Read in the values from SMF first before we check
 183          * command line options so the options override SMF values.
 184          */
 185         bufsz = PATH_MAX;
 186         ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE,
 187             SCF_TYPE_INTEGER, NFSD, &bufsz);
 188         if (ret == SA_OK) {
 189                 errno = 0;
 190                 max_conns_allowed = strtol(value, (char **)NULL, 10);
 191                 if (errno != 0)
 192                         max_conns_allowed = -1;
 193         }
 194 
 195         bufsz = PATH_MAX;
 196         ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE,
 197             SCF_TYPE_INTEGER, NFSD, &bufsz);
 198         if (ret == SA_OK) {
 199                 errno = 0;
 200                 listen_backlog = strtol(value, (char **)NULL, 10);
 201                 if (errno != 0) {
 202                         listen_backlog = 32;
 203                 }
 204         }
 205 
 206         bufsz = PATH_MAX;
 207         ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE,
 208             SCF_TYPE_ASTRING, NFSD, &bufsz);
 209         if ((ret == SA_OK) && strlen(value) > 0) {
 210                 df_proto = strdup(value);
 211                 opt_cnt++;
 212                 if (strncasecmp("ALL", value, 3) == 0) {
 213                         free(df_proto);
 214                         df_proto = NULL;
 215                         df_allflag = 1;
 216                 }
 217         }
 218 
 219         bufsz = PATH_MAX;
 220         ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE,
 221             SCF_TYPE_ASTRING, NFSD, &bufsz);
 222         if ((ret == SA_OK) && strlen(value) > 0) {
 223                 df_provider = strdup(value);
 224                 opt_cnt++;
 225         }
 226 
 227         bufsz = PATH_MAX;
 228         ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE,
 229             SCF_TYPE_INTEGER, NFSD, &bufsz);
 230         if (ret == SA_OK) {
 231                 errno = 0;
 232                 maxservers = strtol(value, (char **)NULL, 10);
 233                 if (errno != 0)
 234                         maxservers = 1024;
 235                 else
 236                         maxservers_set = 1;
 237         }
 238 
 239         bufsz = 4;
 240         ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE,
 241             SCF_TYPE_INTEGER, NFSD, &bufsz);
 242         if (ret == SA_OK)
 243                 nfs_server_vers_min = strtol(value, (char **)NULL, 10);
 244 
 245         bufsz = 4;
 246         ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE,
 247             SCF_TYPE_INTEGER, NFSD, &bufsz);
 248         if (ret == SA_OK)
 249                 nfs_server_vers_max = strtol(value, (char **)NULL, 10);
 250 
 251         bufsz = PATH_MAX;
 252         ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE,
 253             SCF_TYPE_ASTRING, NFSD, &bufsz);
 254         if (ret == SA_OK)
 255                 if (strncasecmp(value, "off", 3) == 0)
 256                         nfs_server_delegation = FALSE;
 257 
 258         /*
 259          * Conflict options error messages.
 260          */
 261         if (opt_cnt > 1) {
 262                 (void) fprintf(stderr, "\nConflicting options, only one of "
 263                     "the following options can be specified\n"
 264                     "in SMF:\n"
 265                     "\tprotocol=ALL\n"
 266                     "\tprotocol=protocol\n"
 267                     "\tdevice=devicename\n\n");
 268                 usage();
 269         }
 270         opt_cnt = 0;
 271 
 272         while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
 273                 switch (i) {
 274                 case 'a':
 275                         free(df_proto);
 276                         df_proto = NULL;
 277                         free(df_provider);
 278                         df_provider = NULL;
 279 
 280                         allflag = 1;
 281                         opt_cnt++;
 282                         break;
 283 
 284                 case 'c':
 285                         max_conns_allowed = atoi(optarg);
 286                         break;
 287 
 288                 case 'p':
 289                         proto = optarg;
 290                         df_allflag = 0;
 291                         opt_cnt++;
 292                         break;
 293 
 294                 /*
 295                  * DSS: NFSv4 distributed stable storage.
 296                  *
 297                  * This is a Contracted Project Private interface, for
 298                  * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
 299                  */
 300                 case 's':
 301                         if (strlen(optarg) < MAXPATHLEN) {
 302                                 /* first "-s" option encountered? */
 303                                 if (dss_pathnames == NULL) {
 304                                         /*
 305                                          * Allocate maximum possible space
 306                                          * required given cmdline arg count;
 307                                          * "-s <path>" consumes two args.
 308                                          */
 309                                         size_t sz = (ac / 2) * sizeof (char *);
 310                                         dss_pathnames = (char **)malloc(sz);
 311                                         if (dss_pathnames == NULL) {
 312                                                 (void) fprintf(stderr, "%s: "
 313                                                     "dss paths malloc failed\n",
 314                                                     av[0]);
 315                                                 exit(1);
 316                                         }
 317                                         (void) memset(dss_pathnames, 0, sz);
 318                                 }
 319                                 dss_pathnames[dss_npaths] = optarg;
 320                                 dss_npaths++;
 321                         } else {
 322                                 (void) fprintf(stderr,
 323                                     "%s: -s pathname too long.\n", av[0]);
 324                         }
 325                         break;
 326 
 327                 case 't':
 328                         provider = optarg;
 329                         df_allflag = 0;
 330                         opt_cnt++;
 331                         break;
 332 
 333                 case 'l':
 334                         listen_backlog = atoi(optarg);
 335                         break;
 336 
 337                 case '?':
 338                         usage();
 339                         /* NOTREACHED */
 340                 }
 341         }
 342 
 343         allflag = df_allflag;
 344         if (proto == NULL)
 345                 proto = df_proto;
 346         if (provider == NULL)
 347                 provider = df_provider;
 348 
 349         /*
 350          * Conflict options error messages.
 351          */
 352         if (opt_cnt > 1) {
 353                 (void) fprintf(stderr, "\nConflicting options, only one of "
 354                     "the following options can be specified\n"
 355                     "on the command line:\n"
 356                     "\t-a\n"
 357                     "\t-p protocol\n"
 358                     "\t-t transport\n\n");
 359                 usage();
 360         }
 361 
 362         if (proto != NULL &&
 363             strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
 364                 if (nfs_server_vers_max == NFS_V4) {
 365                         if (nfs_server_vers_min == NFS_V4) {
 366                                 fprintf(stderr,
 367                                     "NFS version 4 is not supported "
 368                                     "with the UDP protocol.  Exiting\n");
 369                                 exit(3);
 370                         } else {
 371                                 fprintf(stderr,
 372                                     "NFS version 4 is not supported "
 373                                     "with the UDP protocol.\n");
 374                         }
 375                 }
 376         }
 377 
 378         /*
 379          * If there is exactly one more argument, it is the number of
 380          * servers.
 381          */
 382         if (optind == ac - 1) {
 383                 maxservers = atoi(av[optind]);
 384                 maxservers_set = 1;
 385         }
 386         /*
 387          * If there are two or more arguments, then this is a usage error.
 388          */
 389         else if (optind < ac - 1)
 390                 usage();
 391         /*
 392          * Check the ranges for min/max version specified
 393          */
 394         else if ((nfs_server_vers_min > nfs_server_vers_max) ||
 395             (nfs_server_vers_min < NFS_VERSMIN) ||
 396             (nfs_server_vers_max > NFS_VERSMAX))
 397                 usage();
 398         /*
 399          * There are no additional arguments, and we haven't set maxservers
 400          * explicitly via the config file, we use a default number of
 401          * servers.  We will log this.
 402          */
 403         else if (maxservers_set == 0)
 404                 logmaxservers = 1;
 405 
 406         /*
 407          * Basic Sanity checks on options
 408          *
 409          * max_conns_allowed must be positive, except for the special
 410          * value of -1 which is used internally to mean unlimited, -1 isn't
 411          * documented but we allow it anyway.
 412          *
 413          * maxservers must be positive
 414          * listen_backlog must be positive or zero
 415          */
 416         if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
 417             (listen_backlog < 0) || (maxservers <= 0)) {
 418                 usage();
 419         }
 420 
 421         /*
 422          * Set current dir to server root
 423          */
 424         if (chdir(dir) < 0) {
 425                 (void) fprintf(stderr, "%s:  ", MyName);
 426                 perror(dir);
 427                 exit(1);
 428         }
 429 
 430 #ifndef DEBUG
 431         pipe_fd = daemonize_init();
 432 #endif
 433 
 434         openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
 435 
 436         /*
 437          * establish our lock on the lock file and write our pid to it.
 438          * exit if some other process holds the lock, or if there's any
 439          * error in writing/locking the file.
 440          */
 441         pid = _enter_daemon_lock(NFSD);
 442         switch (pid) {
 443         case 0:
 444                 break;
 445         case -1:
 446                 fprintf(stderr, "error locking for %s: %s\n", NFSD,
 447                     strerror(errno));
 448                 exit(2);
 449         default:
 450                 /* daemon was already running */
 451                 exit(0);
 452         }
 453 
 454         /*
 455          * If we've been given a list of paths to be used for distributed
 456          * stable storage, and provided we're going to run a version
 457          * that supports it, setup the DSS paths.
 458          */
 459         if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
 460                 if (dss_init(dss_npaths, dss_pathnames) != 0) {
 461                         fprintf(stderr, "%s", "dss_init failed. Exiting.\n");
 462                         exit(1);
 463                 }
 464         }
 465 
 466         /*
 467          * Block all signals till we spawn other
 468          * threads.
 469          */
 470         (void) sigfillset(&sgset);
 471         (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
 472 
 473         if (logmaxservers) {
 474                 fprintf(stderr,
 475                     "Number of servers not specified. Using default of %d.\n",
 476                     maxservers);
 477         }
 478 
 479         /*
 480          * Make sure to unregister any previous versions in case the
 481          * user is reconfiguring the server in interesting ways.
 482          */
 483         svc_unreg(NFS_PROGRAM, NFS_VERSION);
 484         svc_unreg(NFS_PROGRAM, NFS_V3);
 485         svc_unreg(NFS_PROGRAM, NFS_V4);
 486         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
 487         svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
 488 
 489         /*
 490          * Set up kernel RPC thread pool for the NFS server.
 491          */
 492         if (nfssvcpool(maxservers)) {
 493                 fprintf(stderr, "Can't set up kernel NFS service: %s. "
 494                     "Exiting.\n", strerror(errno));
 495                 exit(1);
 496         }
 497 
 498         /*
 499          * Set up blocked thread to do LWP creation on behalf of the kernel.
 500          */
 501         if (svcwait(NFS_SVCPOOL_ID)) {
 502                 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n",
 503                     strerror(errno));
 504                 exit(1);
 505         }
 506 
 507         /*
 508          * RDMA start and stop thread.
 509          * Per pool RDMA listener creation and
 510          * destructor thread.
 511          *
 512          * start rdma services and block in the kernel.
 513          * (only if proto or provider is not set to TCP or UDP)
 514          */
 515         if ((proto == NULL) && (provider == NULL)) {
 516                 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
 517                     nfs_server_vers_max, nfs_server_delegation)) {
 518                         fprintf(stderr,
 519                             "Can't set up RDMA creator thread : %s\n",
 520                             strerror(errno));
 521                 }
 522         }
 523 
 524         /*
 525          * Now open up for signal delivery
 526          */
 527 
 528         (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
 529         sigset(SIGTERM, sigflush);
 530         sigset(SIGUSR1, quiesce);
 531 
 532         /*
 533          * Build a protocol block list for registration.
 534          */
 535         protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
 536         protobp->serv = "NFS";
 537         protobp->versmin = nfs_server_vers_min;
 538         protobp->versmax = nfs_server_vers_max;
 539         protobp->program = NFS_PROGRAM;
 540 
 541         protobp->next = (struct protob *)malloc(sizeof (struct protob));
 542         protobp = protobp->next;
 543         protobp->serv = "NFS_ACL";           /* not used */
 544         protobp->versmin = nfs_server_vers_min;
 545         /* XXX - this needs work to get the version just right */
 546         protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
 547             NFS_ACL_V3 : nfs_server_vers_max;
 548         protobp->program = NFS_ACL_PROGRAM;
 549         protobp->next = (struct protob *)NULL;
 550 
 551         if (allflag) {
 552                 if (do_all(protobp0, nfssvc) == -1) {
 553                         fprintf(stderr, "setnetconfig failed : %s\n",
 554                             strerror(errno));
 555                         exit(1);
 556                 }
 557         } else if (proto) {
 558                 /* there's more than one match for the same protocol */
 559                 struct netconfig *nconf;
 560                 NCONF_HANDLE *nc;
 561                 bool_t  protoFound = FALSE;
 562                 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
 563                         fprintf(stderr, "setnetconfig failed : %s\n",
 564                             strerror(errno));
 565                         goto done;
 566                 }
 567                 while (nconf = getnetconfig(nc)) {
 568                         if (strcmp(nconf->nc_proto, proto) == 0) {
 569                                 protoFound = TRUE;
 570                                 do_one(nconf->nc_device, NULL,
 571                                     protobp0, nfssvc);
 572                         }
 573                 }
 574                 (void) endnetconfig(nc);
 575                 if (protoFound == FALSE) {
 576                         fprintf(stderr,
 577                             "couldn't find netconfig entry for protocol %s\n",
 578                             proto);
 579                 }
 580         } else if (provider)
 581                 do_one(provider, proto, protobp0, nfssvc);
 582         else {
 583                 for (providerp = defaultproviders;
 584                     *providerp != NULL; providerp++) {
 585                         provider = *providerp;
 586                         do_one(provider, NULL, protobp0, nfssvc);
 587                 }
 588         }
 589 done:
 590 
 591         free(protobp);
 592         free(protobp0);
 593 
 594         if (num_fds == 0) {
 595                 fprintf(stderr, "Could not start NFS service for any protocol."
 596                     " Exiting.\n");
 597                 exit(1);
 598         }
 599 
 600         end_listen_fds = num_fds;
 601 
 602         /*
 603          * nfsd is up and running as far as we are concerned.
 604          */
 605         daemonize_fini(pipe_fd);
 606 
 607         /*
 608          * Get rid of unneeded privileges.
 609          */
 610         __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
 611             PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
 612 
 613         /*
 614          * Poll for non-data control events on the transport descriptors.
 615          */
 616         poll_for_action();
 617 
 618         /*
 619          * If we get here, something failed in poll_for_action().
 620          */
 621         return (1);
 622 }
 623 
 624 static int
 625 nfssvcpool(int maxservers)
 626 {
 627         struct svcpool_args npa;
 628 
 629         npa.id = NFS_SVCPOOL_ID;
 630         npa.maxthreads = maxservers;
 631         npa.redline = 0;
 632         npa.qsize = 0;
 633         npa.timeout = 0;
 634         npa.stksize = 0;
 635         npa.max_same_xprt = 0;
 636         return (_nfssys(SVCPOOL_CREATE, &npa));
 637 }
 638 
 639 /*
 640  * Establish NFS service thread.
 641  */
 642 static int
 643 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
 644 {
 645         struct nfs_svc_args nsa;
 646 
 647         nsa.fd = fd;
 648         nsa.netid = nconf->nc_netid;
 649         nsa.addrmask = addrmask;
 650         if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
 651                 nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
 652                     NFS_V3 : nfs_server_vers_max;
 653                 nsa.versmin = nfs_server_vers_min;
 654                 /*
 655                  * If no version left, silently do nothing, previous
 656                  * checks will have assured at least TCP is available.
 657                  */
 658                 if (nsa.versmin > nsa.versmax)
 659                         return (0);
 660         } else {
 661                 nsa.versmax = nfs_server_vers_max;
 662                 nsa.versmin = nfs_server_vers_min;
 663         }
 664         nsa.delegation = nfs_server_delegation;
 665         return (_nfssys(NFS_SVC, &nsa));
 666 }
 667 
 668 static void
 669 usage(void)
 670 {
 671         (void) fprintf(stderr,
 672 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
 673         (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
 674         (void) fprintf(stderr,
 675 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
 676         (void) fprintf(stderr,
 677 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
 678         (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
 679         (void) fprintf(stderr, "> zero,\n");
 680         (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
 681         (void) fprintf(stderr,
 682             "\ttransport is a transport provider name (i.e. device),\n");
 683         (void) fprintf(stderr,
 684             "\tlisten_backlog is the TCP listen backlog,\n");
 685         (void) fprintf(stderr,
 686             "\tand <nservers> must be a decimal number > zero.\n");
 687         exit(1);
 688 }
 689 
 690 /*
 691  * Issue nfssys system call to flush all logging buffers asynchronously.
 692  *
 693  * NOTICE: It is extremely important to flush NFS logging buffers when
 694  *         nfsd exits. When the system is halted or rebooted nfslogd
 695  *         may not have an opportunity to flush the buffers.
 696  */
 697 static void
 698 nfsl_flush()
 699 {
 700         struct nfsl_flush_args nfa;
 701 
 702         memset((void *)&nfa, 0, sizeof (nfa));
 703         nfa.version = NFSL_FLUSH_ARGS_VERS;
 704         nfa.directive = NFSL_ALL;       /* flush all asynchronously */
 705 
 706         if (_nfssys(LOG_FLUSH, &nfa) < 0)
 707                 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
 708                     strerror(errno));
 709 }
 710 
 711 /*
 712  * SIGTERM handler.
 713  * Flush logging buffers and exit.
 714  */
 715 static void
 716 sigflush(int sig)
 717 {
 718         nfsl_flush();
 719         _exit(0);
 720 }
 721 
 722 /*
 723  * SIGUSR1 handler.
 724  *
 725  * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
 726  *
 727  * This is a Contracted Project Private interface, for the sole use
 728  * of Sun Cluster HA-NFS. See PSARC/2004/497.
 729  *
 730  * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
 731  */
 732 static void
 733 quiesce(int sig)
 734 {
 735         int error;
 736         int id = NFS_SVCPOOL_ID;
 737 
 738         if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
 739                 /* Request server quiesce at next shutdown */
 740                 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
 741 
 742                 /*
 743                  * ENOENT is returned if there is no matching SVC pool
 744                  * for the id. Possibly because the pool is not yet setup.
 745                  * In this case, just exit as if no error. For all other errors,
 746                  * just return and allow caller to retry.
 747                  */
 748                 if (error && errno != ENOENT) {
 749                         syslog(LOG_ERR,
 750                             "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
 751                             strerror(errno));
 752                         return;
 753                 }
 754         }
 755 
 756         /* Flush logging buffers */
 757         nfsl_flush();
 758 
 759         _exit(0);
 760 }
 761 
 762 /*
 763  * DSS: distributed stable storage.
 764  * Create leaf directories as required, keeping an eye on path
 765  * lengths. Calls exit(1) on failure.
 766  * The pathnames passed in must already exist, and must be writeable by nfsd.
 767  * Note: the leaf directories under NFS4_VAR_DIR are not created here;
 768  * they're created at pkg install.
 769  */
 770 static void
 771 dss_mkleafdirs(uint_t npaths, char **pathnames)
 772 {
 773         int i;
 774         char *tmppath = NULL;
 775 
 776         /*
 777          * Create the temporary storage used by dss_mkleafdir() here,
 778          * rather than in that function, so that it only needs to be
 779          * done once, rather than once for each call. Too big to put
 780          * on the function's stack.
 781          */
 782         tmppath = (char *)malloc(MAXPATHLEN);
 783         if (tmppath == NULL) {
 784                 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
 785                 exit(1);
 786         }
 787 
 788         for (i = 0; i < npaths; i++) {
 789                 char *p = pathnames[i];
 790 
 791                 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
 792                 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
 793         }
 794 
 795         free(tmppath);
 796 }
 797 
 798 /*
 799  * Create "leaf" in "dir" (which must already exist).
 800  * leaf: should start with a '/'
 801  */
 802 static void
 803 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
 804 {
 805         /* MAXPATHLEN includes the terminating NUL */
 806         if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
 807                 fprintf(stderr, "stable storage path too long: %s%s. "
 808                     "Exiting.\n", dir, leaf);
 809                 exit(1);
 810         }
 811 
 812         (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
 813 
 814         /* the directory may already exist: that's OK */
 815         if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
 816                 fprintf(stderr, "error creating stable storage directory: "
 817                     "%s: %s. Exiting.\n", strerror(errno), tmppath);
 818                 exit(1);
 819         }
 820 }
 821 
 822 /*
 823  * Create the storage dirs, and pass the path list to the kernel.
 824  * This requires the nfssrv module to be loaded; the _nfssys() syscall
 825  * will fail ENOTSUP if it is not.
 826  * Use libnvpair(3LIB) to pass the data to the kernel.
 827  */
 828 static int
 829 dss_init(uint_t npaths, char **pathnames)
 830 {
 831         int i, j, nskipped, error;
 832         char *bufp;
 833         uint32_t bufsize;
 834         size_t buflen;
 835         nvlist_t *nvl;
 836 
 837         if (npaths > 1) {
 838                 /*
 839                  * We need to remove duplicate paths; this might be user error
 840                  * in the general case, but HA-NFSv4 can also cause this.
 841                  * Sort the pathnames array, and NULL out duplicates,
 842                  * then write the non-NULL entries to a new array.
 843                  * Sorting will also allow the kernel to optimise its searches.
 844                  */
 845 
 846                 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
 847 
 848                 /* now NULL out any duplicates */
 849                 i = 0; j = 1; nskipped = 0;
 850                 while (j < npaths) {
 851                         if (strcmp(pathnames[i], pathnames[j]) == NULL) {
 852                                 pathnames[j] = NULL;
 853                                 j++;
 854                                 nskipped++;
 855                                 continue;
 856                         }
 857 
 858                         /* skip i over any of its NULLed duplicates */
 859                         i = j++;
 860                 }
 861 
 862                 /* finally, write the non-NULL entries to a new array */
 863                 if (nskipped > 0) {
 864                         int nreal;
 865                         size_t sz;
 866                         char **tmp_pathnames;
 867 
 868                         nreal = npaths - nskipped;
 869 
 870                         sz = nreal * sizeof (char *);
 871                         tmp_pathnames = (char **)malloc(sz);
 872                         if (tmp_pathnames == NULL) {
 873                                 fprintf(stderr, "tmp_pathnames malloc "
 874                                     "failed\n");
 875                                 exit(1);
 876                         }
 877 
 878                         for (i = 0, j = 0; i < npaths; i++)
 879                                 if (pathnames[i] != NULL)
 880                                         tmp_pathnames[j++] = pathnames[i];
 881                         free(pathnames);
 882                         pathnames = tmp_pathnames;
 883                         npaths = nreal;
 884                 }
 885 
 886         }
 887 
 888         /* Create directories to store the distributed state files */
 889         dss_mkleafdirs(npaths, pathnames);
 890 
 891         /* Create the name-value pair list */
 892         error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
 893         if (error) {
 894                 fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno));
 895                 return (1);
 896         }
 897 
 898         /* Add the pathnames array as a single name-value pair */
 899         error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
 900             pathnames, npaths);
 901         if (error) {
 902                 fprintf(stderr, "nvlist_add_string_array failed: %s\n",
 903                     strerror(errno));
 904                 nvlist_free(nvl);
 905                 return (1);
 906         }
 907 
 908         /*
 909          * Pack list into contiguous memory, for passing to kernel.
 910          * nvlist_pack() will allocate the memory for the buffer,
 911          * which we should free() when no longer needed.
 912          * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
 913          */
 914         bufp = NULL;
 915         error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
 916         if (error) {
 917                 fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno));
 918                 nvlist_free(nvl);
 919                 return (1);
 920         }
 921 
 922         /* Now we have the packed buffer, we no longer need the list */
 923         nvlist_free(nvl);
 924 
 925         /*
 926          * Let the kernel know in advance how big the buffer is.
 927          * NOTE: we cannot just pass buflen, since size_t is a long, and
 928          * thus a different size between ILP32 userland and LP64 kernel.
 929          * Use an int for the transfer, since that should be big enough;
 930          * this is a no-op at the moment, here, since nfsd is 32-bit, but
 931          * that could change.
 932          */
 933         bufsize = (uint32_t)buflen;
 934         error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
 935         if (error) {
 936                 fprintf(stderr,
 937                     "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n",
 938                     strerror(errno));
 939                 free(bufp);
 940                 return (1);
 941         }
 942 
 943         /* Pass the packed buffer to the kernel */
 944         error = _nfssys(NFS4_DSS_SETPATHS, bufp);
 945         if (error) {
 946                 fprintf(stderr,
 947                     "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno));
 948                 free(bufp);
 949                 return (1);
 950         }
 951 
 952         /*
 953          * The kernel has now unpacked the buffer and extracted the
 954          * pathnames array, we no longer need the buffer.
 955          */
 956         free(bufp);
 957 
 958         return (0);
 959 }
 960 
 961 /*
 962  * Quick sort string compare routine, for qsort.
 963  * Needed to make arg types correct.
 964  */
 965 int
 966 qstrcmp(const void *p1, const void *p2)
 967 {
 968         char *s1 = *((char **)p1);
 969         char *s2 = *((char **)p2);
 970 
 971         return (strcmp(s1, s2));
 972 }