Print this page
    
Revert "OS-871 zone stuck in shutting_down - waiting for kernel thread nfsauth_refresh_thread to terminate"
This reverts commit 887d2a84c612cea61b6ad544f54cf790cfb9de3e.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
          +++ new/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  24   23   */
  25   24  
  26   25  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T         */
  27   26  /*        All Rights Reserved   */
  28   27  
  29   28  /*
  30   29   * University Copyright- Copyright (c) 1982, 1986, 1988
  31   30   * The Regents of the University of California
  32   31   * All Rights Reserved
  33   32   *
  34   33   * University Acknowledgment- Portions of this document are derived from
  35   34   * software developed by the University of California, Berkeley, and its
  36   35   * contributors.
  37   36   */
  38   37  
  39   38  /* LINTLIBRARY */
  40   39  /* PROTOLIB1 */
  41   40  
  42   41  /* NFS server */
  43   42  
  44   43  #include <sys/param.h>
  45   44  #include <sys/types.h>
  46   45  #include <sys/stat.h>
  47   46  #include <syslog.h>
  48   47  #include <tiuser.h>
  49   48  #include <rpc/rpc.h>
  50   49  #include <errno.h>
  51   50  #include <thread.h>
  52   51  #include <sys/resource.h>
  53   52  #include <sys/time.h>
  54   53  #include <sys/file.h>
  55   54  #include <nfs/nfs.h>
  56   55  #include <nfs/nfs_acl.h>
  57   56  #include <nfs/nfssys.h>
  58   57  #include <stdio.h>
  59   58  #include <stdio_ext.h>
  60   59  #include <stdlib.h>
  61   60  #include <signal.h>
  62   61  #include <netconfig.h>
  63   62  #include <netdir.h>
  64   63  #include <string.h>
  65   64  #include <unistd.h>
  66   65  #include <limits.h>
  67   66  #include <stropts.h>
  68   67  #include <sys/tihdr.h>
  69   68  #include <sys/wait.h>
  70   69  #include <poll.h>
  71   70  #include <priv_utils.h>
  72   71  #include <sys/tiuser.h>
  73   72  #include <netinet/tcp.h>
  74   73  #include <deflt.h>
  75   74  #include <rpcsvc/daemon_utils.h>
  76   75  #include <rpcsvc/nfs4_prot.h>
  77   76  #include <libnvpair.h>
  78   77  #include <libscf.h>
  79   78  #include <libshare.h>
  80   79  #include "nfs_tbind.h"
  81   80  #include "thrpool.h"
  82   81  #include "smfcfg.h"
  83   82  
  84   83  /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
  85   84  #define QUIESCE_VERSMIN 4
  86   85  /* DSS: distributed stable storage */
  87   86  #define DSS_VERSMIN     4
  88   87  
  89   88  static  int     nfssvc(int, struct netbuf, struct netconfig *);
  90   89  static  int     nfssvcpool(int maxservers);
  91   90  static  int     dss_init(uint_t npaths, char **pathnames);
  92   91  static  void    dss_mkleafdirs(uint_t npaths, char **pathnames);
  93   92  static  void    dss_mkleafdir(char *dir, char *leaf, char *path);
  94   93  static  void    usage(void);
  95   94  int             qstrcmp(const void *s1, const void *s2);
  96   95  
  97   96  extern  int     _nfssys(int, void *);
  98   97  
  99   98  extern int      daemonize_init(void);
 100   99  extern void     daemonize_fini(int fd);
 101  100  
 102  101  /* signal handlers */
 103  102  static void sigflush(int);
 104  103  static void quiesce(int);
 105  104  
 106  105  static  char    *MyName;
 107  106  static  NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
 108  107                                              "/dev/udp6", NULL };
 109  108  /* static       NETSELDECL(defaultprotos)[] =   { NC_UDP, NC_TCP, NULL }; */
 110  109  /*
 111  110   * The following are all globals used by routines in nfs_tbind.c.
 112  111   */
 113  112  size_t  end_listen_fds;         /* used by conn_close_oldest() */
 114  113  size_t  num_fds = 0;            /* used by multiple routines */
 115  114  int     listen_backlog = 32;    /* used by bind_to_{provider,proto}() */
 116  115  int     num_servers;            /* used by cots_listen_event() */
 117  116  int     (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
 118  117                                  /* used by cots_listen_event() */
 119  118  int     max_conns_allowed = -1; /* used by cots_listen_event() */
 120  119  
 121  120  /*
 122  121   * Keep track of min/max versions of NFS protocol to be started.
 123  122   * Start with the defaults (min == 2, max == 3).  We have the
 124  123   * capability of starting vers=4 but only if the user requests it.
 125  124   */
 126  125  int     nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
 127  126  int     nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
 128  127  
 129  128  /*
 130  129   * Set the default for server delegation enablement and set per
 131  130   * /etc/default/nfs configuration (if present).
 132  131   */
 133  132  int     nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
 134  133  
 135  134  int
 136  135  main(int ac, char *av[])
 137  136  {
 138  137          char *dir = "/";
 139  138          int allflag = 0;
 140  139          int df_allflag = 0;
 141  140          int opt_cnt = 0;
 142  141          int maxservers = 1024;  /* zero allows inifinte number of threads */
 143  142          int maxservers_set = 0;
 144  143          int logmaxservers = 0;
 145  144          int pid;
 146  145          int i;
 147  146          char *provider = (char *)NULL;
 148  147          char *df_provider = (char *)NULL;
 149  148          struct protob *protobp0, *protobp;
 150  149          NETSELDECL(proto) = NULL;
 151  150          NETSELDECL(df_proto) = NULL;
 152  151          NETSELPDECL(providerp);
 153  152          char *defval;
 154  153          boolean_t can_do_mlp;
 155  154          uint_t dss_npaths = 0;
 156  155          char **dss_pathnames = NULL;
 157  156          sigset_t sgset;
 158  157          char name[PATH_MAX], value[PATH_MAX];
 159  158          int ret, bufsz;
 160  159  
 161  160          int pipe_fd = -1;
 162  161  
 163  162          MyName = *av;
 164  163  
 165  164          /*
 166  165           * Initializations that require more privileges than we need to run.
 167  166           */
 168  167          (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
  
    | 
      ↓ open down ↓ | 
    135 lines elided | 
    
      ↑ open up ↑ | 
  
 169  168          svcsetprio();
 170  169  
 171  170          can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
 172  171          if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
 173  172              DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
 174  173              can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
 175  174                  (void) fprintf(stderr, "%s should be run with"
 176  175                      " sufficient privileges\n", av[0]);
 177  176                  exit(1);
 178  177          }
 179      -
 180      -        /* Nfsd cannot run in a non-global zone. */
 181      -        if (getzoneid() != GLOBAL_ZONEID) {
 182      -                (void) fprintf(stderr, "%s: can only run in the global zone\n",
 183      -                    av[0]);
 184      -                exit(1);
 185      -        }
 186  178  
 187  179          (void) enable_extended_FILE_stdio(-1, -1);
 188  180  
 189  181          /*
 190  182           * Read in the values from SMF first before we check
 191  183           * command line options so the options override SMF values.
 192  184           */
 193  185          bufsz = PATH_MAX;
 194  186          ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE,
 195  187              SCF_TYPE_INTEGER, NFSD, &bufsz);
 196  188          if (ret == SA_OK) {
 197  189                  errno = 0;
 198  190                  max_conns_allowed = strtol(value, (char **)NULL, 10);
 199  191                  if (errno != 0)
 200  192                          max_conns_allowed = -1;
 201  193          }
 202  194  
 203  195          bufsz = PATH_MAX;
 204  196          ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE,
 205  197              SCF_TYPE_INTEGER, NFSD, &bufsz);
 206  198          if (ret == SA_OK) {
 207  199                  errno = 0;
 208  200                  listen_backlog = strtol(value, (char **)NULL, 10);
 209  201                  if (errno != 0) {
 210  202                          listen_backlog = 32;
 211  203                  }
 212  204          }
 213  205  
 214  206          bufsz = PATH_MAX;
 215  207          ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE,
 216  208              SCF_TYPE_ASTRING, NFSD, &bufsz);
 217  209          if ((ret == SA_OK) && strlen(value) > 0) {
 218  210                  df_proto = strdup(value);
 219  211                  opt_cnt++;
 220  212                  if (strncasecmp("ALL", value, 3) == 0) {
 221  213                          free(df_proto);
 222  214                          df_proto = NULL;
 223  215                          df_allflag = 1;
 224  216                  }
 225  217          }
 226  218  
 227  219          bufsz = PATH_MAX;
 228  220          ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE,
 229  221              SCF_TYPE_ASTRING, NFSD, &bufsz);
 230  222          if ((ret == SA_OK) && strlen(value) > 0) {
 231  223                  df_provider = strdup(value);
 232  224                  opt_cnt++;
 233  225          }
 234  226  
 235  227          bufsz = PATH_MAX;
 236  228          ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE,
 237  229              SCF_TYPE_INTEGER, NFSD, &bufsz);
 238  230          if (ret == SA_OK) {
 239  231                  errno = 0;
 240  232                  maxservers = strtol(value, (char **)NULL, 10);
 241  233                  if (errno != 0)
 242  234                          maxservers = 1024;
 243  235                  else
 244  236                          maxservers_set = 1;
 245  237          }
 246  238  
 247  239          bufsz = 4;
 248  240          ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE,
 249  241              SCF_TYPE_INTEGER, NFSD, &bufsz);
 250  242          if (ret == SA_OK)
 251  243                  nfs_server_vers_min = strtol(value, (char **)NULL, 10);
 252  244  
 253  245          bufsz = 4;
 254  246          ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE,
 255  247              SCF_TYPE_INTEGER, NFSD, &bufsz);
 256  248          if (ret == SA_OK)
 257  249                  nfs_server_vers_max = strtol(value, (char **)NULL, 10);
 258  250  
 259  251          bufsz = PATH_MAX;
 260  252          ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE,
 261  253              SCF_TYPE_ASTRING, NFSD, &bufsz);
 262  254          if (ret == SA_OK)
 263  255                  if (strncasecmp(value, "off", 3) == 0)
 264  256                          nfs_server_delegation = FALSE;
 265  257  
 266  258          /*
 267  259           * Conflict options error messages.
 268  260           */
 269  261          if (opt_cnt > 1) {
 270  262                  (void) fprintf(stderr, "\nConflicting options, only one of "
 271  263                      "the following options can be specified\n"
 272  264                      "in SMF:\n"
 273  265                      "\tprotocol=ALL\n"
 274  266                      "\tprotocol=protocol\n"
 275  267                      "\tdevice=devicename\n\n");
 276  268                  usage();
 277  269          }
 278  270          opt_cnt = 0;
 279  271  
 280  272          while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
 281  273                  switch (i) {
 282  274                  case 'a':
 283  275                          free(df_proto);
 284  276                          df_proto = NULL;
 285  277                          free(df_provider);
 286  278                          df_provider = NULL;
 287  279  
 288  280                          allflag = 1;
 289  281                          opt_cnt++;
 290  282                          break;
 291  283  
 292  284                  case 'c':
 293  285                          max_conns_allowed = atoi(optarg);
 294  286                          break;
 295  287  
 296  288                  case 'p':
 297  289                          proto = optarg;
 298  290                          df_allflag = 0;
 299  291                          opt_cnt++;
 300  292                          break;
 301  293  
 302  294                  /*
 303  295                   * DSS: NFSv4 distributed stable storage.
 304  296                   *
 305  297                   * This is a Contracted Project Private interface, for
 306  298                   * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
 307  299                   */
 308  300                  case 's':
 309  301                          if (strlen(optarg) < MAXPATHLEN) {
 310  302                                  /* first "-s" option encountered? */
 311  303                                  if (dss_pathnames == NULL) {
 312  304                                          /*
 313  305                                           * Allocate maximum possible space
 314  306                                           * required given cmdline arg count;
 315  307                                           * "-s <path>" consumes two args.
 316  308                                           */
 317  309                                          size_t sz = (ac / 2) * sizeof (char *);
 318  310                                          dss_pathnames = (char **)malloc(sz);
 319  311                                          if (dss_pathnames == NULL) {
 320  312                                                  (void) fprintf(stderr, "%s: "
 321  313                                                      "dss paths malloc failed\n",
 322  314                                                      av[0]);
 323  315                                                  exit(1);
 324  316                                          }
 325  317                                          (void) memset(dss_pathnames, 0, sz);
 326  318                                  }
 327  319                                  dss_pathnames[dss_npaths] = optarg;
 328  320                                  dss_npaths++;
 329  321                          } else {
 330  322                                  (void) fprintf(stderr,
 331  323                                      "%s: -s pathname too long.\n", av[0]);
 332  324                          }
 333  325                          break;
 334  326  
 335  327                  case 't':
 336  328                          provider = optarg;
 337  329                          df_allflag = 0;
 338  330                          opt_cnt++;
 339  331                          break;
 340  332  
 341  333                  case 'l':
 342  334                          listen_backlog = atoi(optarg);
 343  335                          break;
 344  336  
 345  337                  case '?':
 346  338                          usage();
 347  339                          /* NOTREACHED */
 348  340                  }
 349  341          }
 350  342  
 351  343          allflag = df_allflag;
 352  344          if (proto == NULL)
 353  345                  proto = df_proto;
 354  346          if (provider == NULL)
 355  347                  provider = df_provider;
 356  348  
 357  349          /*
 358  350           * Conflict options error messages.
 359  351           */
 360  352          if (opt_cnt > 1) {
 361  353                  (void) fprintf(stderr, "\nConflicting options, only one of "
 362  354                      "the following options can be specified\n"
 363  355                      "on the command line:\n"
 364  356                      "\t-a\n"
 365  357                      "\t-p protocol\n"
 366  358                      "\t-t transport\n\n");
 367  359                  usage();
 368  360          }
 369  361  
 370  362          if (proto != NULL &&
 371  363              strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
 372  364                  if (nfs_server_vers_max == NFS_V4) {
 373  365                          if (nfs_server_vers_min == NFS_V4) {
 374  366                                  fprintf(stderr,
 375  367                                      "NFS version 4 is not supported "
 376  368                                      "with the UDP protocol.  Exiting\n");
 377  369                                  exit(3);
 378  370                          } else {
 379  371                                  fprintf(stderr,
 380  372                                      "NFS version 4 is not supported "
 381  373                                      "with the UDP protocol.\n");
 382  374                          }
 383  375                  }
 384  376          }
 385  377  
 386  378          /*
 387  379           * If there is exactly one more argument, it is the number of
 388  380           * servers.
 389  381           */
 390  382          if (optind == ac - 1) {
 391  383                  maxservers = atoi(av[optind]);
 392  384                  maxservers_set = 1;
 393  385          }
 394  386          /*
 395  387           * If there are two or more arguments, then this is a usage error.
 396  388           */
 397  389          else if (optind < ac - 1)
 398  390                  usage();
 399  391          /*
 400  392           * Check the ranges for min/max version specified
 401  393           */
 402  394          else if ((nfs_server_vers_min > nfs_server_vers_max) ||
 403  395              (nfs_server_vers_min < NFS_VERSMIN) ||
 404  396              (nfs_server_vers_max > NFS_VERSMAX))
 405  397                  usage();
 406  398          /*
 407  399           * There are no additional arguments, and we haven't set maxservers
 408  400           * explicitly via the config file, we use a default number of
 409  401           * servers.  We will log this.
 410  402           */
 411  403          else if (maxservers_set == 0)
 412  404                  logmaxservers = 1;
 413  405  
 414  406          /*
 415  407           * Basic Sanity checks on options
 416  408           *
 417  409           * max_conns_allowed must be positive, except for the special
 418  410           * value of -1 which is used internally to mean unlimited, -1 isn't
 419  411           * documented but we allow it anyway.
 420  412           *
 421  413           * maxservers must be positive
 422  414           * listen_backlog must be positive or zero
 423  415           */
 424  416          if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
 425  417              (listen_backlog < 0) || (maxservers <= 0)) {
 426  418                  usage();
 427  419          }
 428  420  
 429  421          /*
 430  422           * Set current dir to server root
 431  423           */
 432  424          if (chdir(dir) < 0) {
 433  425                  (void) fprintf(stderr, "%s:  ", MyName);
 434  426                  perror(dir);
 435  427                  exit(1);
 436  428          }
 437  429  
 438  430  #ifndef DEBUG
 439  431          pipe_fd = daemonize_init();
 440  432  #endif
 441  433  
 442  434          openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
 443  435  
 444  436          /*
 445  437           * establish our lock on the lock file and write our pid to it.
 446  438           * exit if some other process holds the lock, or if there's any
 447  439           * error in writing/locking the file.
 448  440           */
 449  441          pid = _enter_daemon_lock(NFSD);
 450  442          switch (pid) {
 451  443          case 0:
 452  444                  break;
 453  445          case -1:
 454  446                  fprintf(stderr, "error locking for %s: %s\n", NFSD,
 455  447                      strerror(errno));
 456  448                  exit(2);
 457  449          default:
 458  450                  /* daemon was already running */
 459  451                  exit(0);
 460  452          }
 461  453  
 462  454          /*
 463  455           * If we've been given a list of paths to be used for distributed
 464  456           * stable storage, and provided we're going to run a version
 465  457           * that supports it, setup the DSS paths.
 466  458           */
 467  459          if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
 468  460                  if (dss_init(dss_npaths, dss_pathnames) != 0) {
 469  461                          fprintf(stderr, "%s", "dss_init failed. Exiting.\n");
 470  462                          exit(1);
 471  463                  }
 472  464          }
 473  465  
 474  466          /*
 475  467           * Block all signals till we spawn other
 476  468           * threads.
 477  469           */
 478  470          (void) sigfillset(&sgset);
 479  471          (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
 480  472  
 481  473          if (logmaxservers) {
 482  474                  fprintf(stderr,
 483  475                      "Number of servers not specified. Using default of %d.\n",
 484  476                      maxservers);
 485  477          }
 486  478  
 487  479          /*
 488  480           * Make sure to unregister any previous versions in case the
 489  481           * user is reconfiguring the server in interesting ways.
 490  482           */
 491  483          svc_unreg(NFS_PROGRAM, NFS_VERSION);
 492  484          svc_unreg(NFS_PROGRAM, NFS_V3);
 493  485          svc_unreg(NFS_PROGRAM, NFS_V4);
 494  486          svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
 495  487          svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
 496  488  
 497  489          /*
 498  490           * Set up kernel RPC thread pool for the NFS server.
 499  491           */
 500  492          if (nfssvcpool(maxservers)) {
 501  493                  fprintf(stderr, "Can't set up kernel NFS service: %s. "
 502  494                      "Exiting.\n", strerror(errno));
 503  495                  exit(1);
 504  496          }
 505  497  
 506  498          /*
 507  499           * Set up blocked thread to do LWP creation on behalf of the kernel.
 508  500           */
 509  501          if (svcwait(NFS_SVCPOOL_ID)) {
 510  502                  fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n",
 511  503                      strerror(errno));
 512  504                  exit(1);
 513  505          }
 514  506  
 515  507          /*
 516  508           * RDMA start and stop thread.
 517  509           * Per pool RDMA listener creation and
 518  510           * destructor thread.
 519  511           *
 520  512           * start rdma services and block in the kernel.
 521  513           * (only if proto or provider is not set to TCP or UDP)
 522  514           */
 523  515          if ((proto == NULL) && (provider == NULL)) {
 524  516                  if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
 525  517                      nfs_server_vers_max, nfs_server_delegation)) {
 526  518                          fprintf(stderr,
 527  519                              "Can't set up RDMA creator thread : %s\n",
 528  520                              strerror(errno));
 529  521                  }
 530  522          }
 531  523  
 532  524          /*
 533  525           * Now open up for signal delivery
 534  526           */
 535  527  
 536  528          (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
 537  529          sigset(SIGTERM, sigflush);
 538  530          sigset(SIGUSR1, quiesce);
 539  531  
 540  532          /*
 541  533           * Build a protocol block list for registration.
 542  534           */
 543  535          protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
 544  536          protobp->serv = "NFS";
 545  537          protobp->versmin = nfs_server_vers_min;
 546  538          protobp->versmax = nfs_server_vers_max;
 547  539          protobp->program = NFS_PROGRAM;
 548  540  
 549  541          protobp->next = (struct protob *)malloc(sizeof (struct protob));
 550  542          protobp = protobp->next;
 551  543          protobp->serv = "NFS_ACL";              /* not used */
 552  544          protobp->versmin = nfs_server_vers_min;
 553  545          /* XXX - this needs work to get the version just right */
 554  546          protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
 555  547              NFS_ACL_V3 : nfs_server_vers_max;
 556  548          protobp->program = NFS_ACL_PROGRAM;
 557  549          protobp->next = (struct protob *)NULL;
 558  550  
 559  551          if (allflag) {
 560  552                  if (do_all(protobp0, nfssvc) == -1) {
 561  553                          fprintf(stderr, "setnetconfig failed : %s\n",
 562  554                              strerror(errno));
 563  555                          exit(1);
 564  556                  }
 565  557          } else if (proto) {
 566  558                  /* there's more than one match for the same protocol */
 567  559                  struct netconfig *nconf;
 568  560                  NCONF_HANDLE *nc;
 569  561                  bool_t  protoFound = FALSE;
 570  562                  if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
 571  563                          fprintf(stderr, "setnetconfig failed : %s\n",
 572  564                              strerror(errno));
 573  565                          goto done;
 574  566                  }
 575  567                  while (nconf = getnetconfig(nc)) {
 576  568                          if (strcmp(nconf->nc_proto, proto) == 0) {
 577  569                                  protoFound = TRUE;
 578  570                                  do_one(nconf->nc_device, NULL,
 579  571                                      protobp0, nfssvc);
 580  572                          }
 581  573                  }
 582  574                  (void) endnetconfig(nc);
 583  575                  if (protoFound == FALSE) {
 584  576                          fprintf(stderr,
 585  577                              "couldn't find netconfig entry for protocol %s\n",
 586  578                              proto);
 587  579                  }
 588  580          } else if (provider)
 589  581                  do_one(provider, proto, protobp0, nfssvc);
 590  582          else {
 591  583                  for (providerp = defaultproviders;
 592  584                      *providerp != NULL; providerp++) {
 593  585                          provider = *providerp;
 594  586                          do_one(provider, NULL, protobp0, nfssvc);
 595  587                  }
 596  588          }
 597  589  done:
 598  590  
 599  591          free(protobp);
 600  592          free(protobp0);
 601  593  
 602  594          if (num_fds == 0) {
 603  595                  fprintf(stderr, "Could not start NFS service for any protocol."
 604  596                      " Exiting.\n");
 605  597                  exit(1);
 606  598          }
 607  599  
 608  600          end_listen_fds = num_fds;
 609  601  
 610  602          /*
 611  603           * nfsd is up and running as far as we are concerned.
 612  604           */
 613  605          daemonize_fini(pipe_fd);
 614  606  
 615  607          /*
 616  608           * Get rid of unneeded privileges.
 617  609           */
 618  610          __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
 619  611              PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
 620  612  
 621  613          /*
 622  614           * Poll for non-data control events on the transport descriptors.
 623  615           */
 624  616          poll_for_action();
 625  617  
 626  618          /*
 627  619           * If we get here, something failed in poll_for_action().
 628  620           */
 629  621          return (1);
 630  622  }
 631  623  
 632  624  static int
 633  625  nfssvcpool(int maxservers)
 634  626  {
 635  627          struct svcpool_args npa;
 636  628  
 637  629          npa.id = NFS_SVCPOOL_ID;
 638  630          npa.maxthreads = maxservers;
 639  631          npa.redline = 0;
 640  632          npa.qsize = 0;
 641  633          npa.timeout = 0;
 642  634          npa.stksize = 0;
 643  635          npa.max_same_xprt = 0;
 644  636          return (_nfssys(SVCPOOL_CREATE, &npa));
 645  637  }
 646  638  
 647  639  /*
 648  640   * Establish NFS service thread.
 649  641   */
 650  642  static int
 651  643  nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
 652  644  {
 653  645          struct nfs_svc_args nsa;
 654  646  
 655  647          nsa.fd = fd;
 656  648          nsa.netid = nconf->nc_netid;
 657  649          nsa.addrmask = addrmask;
 658  650          if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
 659  651                  nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
 660  652                      NFS_V3 : nfs_server_vers_max;
 661  653                  nsa.versmin = nfs_server_vers_min;
 662  654                  /*
 663  655                   * If no version left, silently do nothing, previous
 664  656                   * checks will have assured at least TCP is available.
 665  657                   */
 666  658                  if (nsa.versmin > nsa.versmax)
 667  659                          return (0);
 668  660          } else {
 669  661                  nsa.versmax = nfs_server_vers_max;
 670  662                  nsa.versmin = nfs_server_vers_min;
 671  663          }
 672  664          nsa.delegation = nfs_server_delegation;
 673  665          return (_nfssys(NFS_SVC, &nsa));
 674  666  }
 675  667  
 676  668  static void
 677  669  usage(void)
 678  670  {
 679  671          (void) fprintf(stderr,
 680  672  "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
 681  673          (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
 682  674          (void) fprintf(stderr,
 683  675  "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
 684  676          (void) fprintf(stderr,
 685  677  "\tmax_conns is the maximum number of concurrent connections allowed,\n");
 686  678          (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
 687  679          (void) fprintf(stderr, "> zero,\n");
 688  680          (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
 689  681          (void) fprintf(stderr,
 690  682              "\ttransport is a transport provider name (i.e. device),\n");
 691  683          (void) fprintf(stderr,
 692  684              "\tlisten_backlog is the TCP listen backlog,\n");
 693  685          (void) fprintf(stderr,
 694  686              "\tand <nservers> must be a decimal number > zero.\n");
 695  687          exit(1);
 696  688  }
 697  689  
 698  690  /*
 699  691   * Issue nfssys system call to flush all logging buffers asynchronously.
 700  692   *
 701  693   * NOTICE: It is extremely important to flush NFS logging buffers when
 702  694   *         nfsd exits. When the system is halted or rebooted nfslogd
 703  695   *         may not have an opportunity to flush the buffers.
 704  696   */
 705  697  static void
 706  698  nfsl_flush()
 707  699  {
 708  700          struct nfsl_flush_args nfa;
 709  701  
 710  702          memset((void *)&nfa, 0, sizeof (nfa));
 711  703          nfa.version = NFSL_FLUSH_ARGS_VERS;
 712  704          nfa.directive = NFSL_ALL;       /* flush all asynchronously */
 713  705  
 714  706          if (_nfssys(LOG_FLUSH, &nfa) < 0)
 715  707                  syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
 716  708                      strerror(errno));
 717  709  }
 718  710  
 719  711  /*
 720  712   * SIGTERM handler.
 721  713   * Flush logging buffers and exit.
 722  714   */
 723  715  static void
 724  716  sigflush(int sig)
 725  717  {
 726  718          nfsl_flush();
 727  719          _exit(0);
 728  720  }
 729  721  
 730  722  /*
 731  723   * SIGUSR1 handler.
 732  724   *
 733  725   * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
 734  726   *
 735  727   * This is a Contracted Project Private interface, for the sole use
 736  728   * of Sun Cluster HA-NFS. See PSARC/2004/497.
 737  729   *
 738  730   * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
 739  731   */
 740  732  static void
 741  733  quiesce(int sig)
 742  734  {
 743  735          int error;
 744  736          int id = NFS_SVCPOOL_ID;
 745  737  
 746  738          if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
 747  739                  /* Request server quiesce at next shutdown */
 748  740                  error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
 749  741  
 750  742                  /*
 751  743                   * ENOENT is returned if there is no matching SVC pool
 752  744                   * for the id. Possibly because the pool is not yet setup.
 753  745                   * In this case, just exit as if no error. For all other errors,
 754  746                   * just return and allow caller to retry.
 755  747                   */
 756  748                  if (error && errno != ENOENT) {
 757  749                          syslog(LOG_ERR,
 758  750                              "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
 759  751                              strerror(errno));
 760  752                          return;
 761  753                  }
 762  754          }
 763  755  
 764  756          /* Flush logging buffers */
 765  757          nfsl_flush();
 766  758  
 767  759          _exit(0);
 768  760  }
 769  761  
 770  762  /*
 771  763   * DSS: distributed stable storage.
 772  764   * Create leaf directories as required, keeping an eye on path
 773  765   * lengths. Calls exit(1) on failure.
 774  766   * The pathnames passed in must already exist, and must be writeable by nfsd.
 775  767   * Note: the leaf directories under NFS4_VAR_DIR are not created here;
 776  768   * they're created at pkg install.
 777  769   */
 778  770  static void
 779  771  dss_mkleafdirs(uint_t npaths, char **pathnames)
 780  772  {
 781  773          int i;
 782  774          char *tmppath = NULL;
 783  775  
 784  776          /*
 785  777           * Create the temporary storage used by dss_mkleafdir() here,
 786  778           * rather than in that function, so that it only needs to be
 787  779           * done once, rather than once for each call. Too big to put
 788  780           * on the function's stack.
 789  781           */
 790  782          tmppath = (char *)malloc(MAXPATHLEN);
 791  783          if (tmppath == NULL) {
 792  784                  syslog(LOG_ERR, "tmppath malloc failed. Exiting");
 793  785                  exit(1);
 794  786          }
 795  787  
 796  788          for (i = 0; i < npaths; i++) {
 797  789                  char *p = pathnames[i];
 798  790  
 799  791                  dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
 800  792                  dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
 801  793          }
 802  794  
 803  795          free(tmppath);
 804  796  }
 805  797  
 806  798  /*
 807  799   * Create "leaf" in "dir" (which must already exist).
 808  800   * leaf: should start with a '/'
 809  801   */
 810  802  static void
 811  803  dss_mkleafdir(char *dir, char *leaf, char *tmppath)
 812  804  {
 813  805          /* MAXPATHLEN includes the terminating NUL */
 814  806          if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
 815  807                  fprintf(stderr, "stable storage path too long: %s%s. "
 816  808                      "Exiting.\n", dir, leaf);
 817  809                  exit(1);
 818  810          }
 819  811  
 820  812          (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
 821  813  
 822  814          /* the directory may already exist: that's OK */
 823  815          if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
 824  816                  fprintf(stderr, "error creating stable storage directory: "
 825  817                      "%s: %s. Exiting.\n", strerror(errno), tmppath);
 826  818                  exit(1);
 827  819          }
 828  820  }
 829  821  
 830  822  /*
 831  823   * Create the storage dirs, and pass the path list to the kernel.
 832  824   * This requires the nfssrv module to be loaded; the _nfssys() syscall
 833  825   * will fail ENOTSUP if it is not.
 834  826   * Use libnvpair(3LIB) to pass the data to the kernel.
 835  827   */
 836  828  static int
 837  829  dss_init(uint_t npaths, char **pathnames)
 838  830  {
 839  831          int i, j, nskipped, error;
 840  832          char *bufp;
 841  833          uint32_t bufsize;
 842  834          size_t buflen;
 843  835          nvlist_t *nvl;
 844  836  
 845  837          if (npaths > 1) {
 846  838                  /*
 847  839                   * We need to remove duplicate paths; this might be user error
 848  840                   * in the general case, but HA-NFSv4 can also cause this.
 849  841                   * Sort the pathnames array, and NULL out duplicates,
 850  842                   * then write the non-NULL entries to a new array.
 851  843                   * Sorting will also allow the kernel to optimise its searches.
 852  844                   */
 853  845  
 854  846                  qsort(pathnames, npaths, sizeof (char *), qstrcmp);
 855  847  
 856  848                  /* now NULL out any duplicates */
 857  849                  i = 0; j = 1; nskipped = 0;
 858  850                  while (j < npaths) {
 859  851                          if (strcmp(pathnames[i], pathnames[j]) == 0) {
 860  852                                  pathnames[j] = NULL;
 861  853                                  j++;
 862  854                                  nskipped++;
 863  855                                  continue;
 864  856                          }
 865  857  
 866  858                          /* skip i over any of its NULLed duplicates */
 867  859                          i = j++;
 868  860                  }
 869  861  
 870  862                  /* finally, write the non-NULL entries to a new array */
 871  863                  if (nskipped > 0) {
 872  864                          int nreal;
 873  865                          size_t sz;
 874  866                          char **tmp_pathnames;
 875  867  
 876  868                          nreal = npaths - nskipped;
 877  869  
 878  870                          sz = nreal * sizeof (char *);
 879  871                          tmp_pathnames = (char **)malloc(sz);
 880  872                          if (tmp_pathnames == NULL) {
 881  873                                  fprintf(stderr, "tmp_pathnames malloc "
 882  874                                      "failed\n");
 883  875                                  exit(1);
 884  876                          }
 885  877  
 886  878                          for (i = 0, j = 0; i < npaths; i++)
 887  879                                  if (pathnames[i] != NULL)
 888  880                                          tmp_pathnames[j++] = pathnames[i];
 889  881                          free(pathnames);
 890  882                          pathnames = tmp_pathnames;
 891  883                          npaths = nreal;
 892  884                  }
 893  885  
 894  886          }
 895  887  
 896  888          /* Create directories to store the distributed state files */
 897  889          dss_mkleafdirs(npaths, pathnames);
 898  890  
 899  891          /* Create the name-value pair list */
 900  892          error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
 901  893          if (error) {
 902  894                  fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno));
 903  895                  return (1);
 904  896          }
 905  897  
 906  898          /* Add the pathnames array as a single name-value pair */
 907  899          error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
 908  900              pathnames, npaths);
 909  901          if (error) {
 910  902                  fprintf(stderr, "nvlist_add_string_array failed: %s\n",
 911  903                      strerror(errno));
 912  904                  nvlist_free(nvl);
 913  905                  return (1);
 914  906          }
 915  907  
 916  908          /*
 917  909           * Pack list into contiguous memory, for passing to kernel.
 918  910           * nvlist_pack() will allocate the memory for the buffer,
 919  911           * which we should free() when no longer needed.
 920  912           * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
 921  913           */
 922  914          bufp = NULL;
 923  915          error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
 924  916          if (error) {
 925  917                  fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno));
 926  918                  nvlist_free(nvl);
 927  919                  return (1);
 928  920          }
 929  921  
 930  922          /* Now we have the packed buffer, we no longer need the list */
 931  923          nvlist_free(nvl);
 932  924  
 933  925          /*
 934  926           * Let the kernel know in advance how big the buffer is.
 935  927           * NOTE: we cannot just pass buflen, since size_t is a long, and
 936  928           * thus a different size between ILP32 userland and LP64 kernel.
 937  929           * Use an int for the transfer, since that should be big enough;
 938  930           * this is a no-op at the moment, here, since nfsd is 32-bit, but
 939  931           * that could change.
 940  932           */
 941  933          bufsize = (uint32_t)buflen;
 942  934          error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
 943  935          if (error) {
 944  936                  fprintf(stderr,
 945  937                      "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n",
 946  938                      strerror(errno));
 947  939                  free(bufp);
 948  940                  return (1);
 949  941          }
 950  942  
 951  943          /* Pass the packed buffer to the kernel */
 952  944          error = _nfssys(NFS4_DSS_SETPATHS, bufp);
 953  945          if (error) {
 954  946                  fprintf(stderr,
 955  947                      "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno));
 956  948                  free(bufp);
 957  949                  return (1);
 958  950          }
 959  951  
 960  952          /*
 961  953           * The kernel has now unpacked the buffer and extracted the
 962  954           * pathnames array, we no longer need the buffer.
 963  955           */
 964  956          free(bufp);
 965  957  
 966  958          return (0);
 967  959  }
 968  960  
 969  961  /*
 970  962   * Quick sort string compare routine, for qsort.
 971  963   * Needed to make arg types correct.
 972  964   */
 973  965  int
 974  966  qstrcmp(const void *p1, const void *p2)
 975  967  {
 976  968          char *s1 = *((char **)p1);
 977  969          char *s2 = *((char **)p2);
 978  970  
 979  971          return (strcmp(s1, s2));
 980  972  }
  
    | 
      ↓ open down ↓ | 
    785 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX