1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2015, Joyent Inc.
  25  */
  26 
  27 #include <assert.h>
  28 #include <ctype.h>
  29 #include <errno.h>
  30 #include <fcntl.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <strings.h>
  35 #include <syslog.h>
  36 #include <zone.h>
  37 #include <sys/types.h>
  38 #include <sys/stat.h>
  39 #include <stropts.h>
  40 #include <sys/conf.h>
  41 #include <pthread.h>
  42 #include <unistd.h>
  43 #include <wait.h>
  44 #include <libcontract.h>
  45 #include <libcontract_priv.h>
  46 #include <sys/contract/process.h>
  47 #include <sys/vnic.h>
  48 #include <zone.h>
  49 #include "dlmgmt_impl.h"
  50 
  51 typedef enum dlmgmt_db_op {
  52         DLMGMT_DB_OP_WRITE,
  53         DLMGMT_DB_OP_DELETE,
  54         DLMGMT_DB_OP_READ
  55 } dlmgmt_db_op_t;
  56 
  57 typedef struct dlmgmt_db_req_s {
  58         struct dlmgmt_db_req_s  *ls_next;
  59         dlmgmt_db_op_t          ls_op;
  60         char                    ls_link[MAXLINKNAMELEN];
  61         datalink_id_t           ls_linkid;
  62         zoneid_t                ls_zoneid;
  63         uint32_t                ls_flags;       /* Either DLMGMT_ACTIVE or   */
  64                                                 /* DLMGMT_PERSIST, not both. */
  65 } dlmgmt_db_req_t;
  66 
  67 /*
  68  * List of pending db updates (e.g., because of a read-only filesystem).
  69  */
  70 static dlmgmt_db_req_t  *dlmgmt_db_req_head = NULL;
  71 static dlmgmt_db_req_t  *dlmgmt_db_req_tail = NULL;
  72 
  73 /*
  74  * rewrite_needed is set to B_TRUE by process_link_line() if it encounters a
  75  * line with an old format.  This will cause the file being read to be
  76  * re-written with the current format.
  77  */
  78 static boolean_t        rewrite_needed;
  79 
  80 static int              dlmgmt_db_update(dlmgmt_db_op_t, const char *,
  81                             dlmgmt_link_t *, uint32_t);
  82 static int              dlmgmt_process_db_req(dlmgmt_db_req_t *);
  83 static int              dlmgmt_process_db_onereq(dlmgmt_db_req_t *, boolean_t);
  84 static void             *dlmgmt_db_update_thread(void *);
  85 static boolean_t        process_link_line(char *, dlmgmt_link_t *);
  86 static int              process_db_write(dlmgmt_db_req_t *, FILE *, FILE *);
  87 static int              process_db_read(dlmgmt_db_req_t *, FILE *);
  88 static void             generate_link_line(dlmgmt_link_t *, boolean_t, char *);
  89 
  90 #define BUFLEN(lim, ptr)        (((lim) > (ptr)) ? ((lim) - (ptr)) : 0)
  91 #define MAXLINELEN              1024
  92 
  93 typedef void db_walk_func_t(dlmgmt_link_t *);
  94 
  95 /*
  96  * Translator functions to go from dladm_datatype_t to character strings.
  97  * Each function takes a pointer to a buffer, the size of the buffer,
  98  * the name of the attribute, and the value to be written.  The functions
  99  * return the number of bytes written to the buffer.  If the buffer is not big
 100  * enough to hold the string representing the value, then nothing is written
 101  * and 0 is returned.
 102  */
 103 typedef size_t write_func_t(char *, size_t, char *, void *);
 104 
 105 /*
 106  * Translator functions to read from a NULL terminated string buffer into
 107  * something of the given DLADM_TYPE_*.  The functions each return the number
 108  * of bytes read from the string buffer.  If there is an error reading data
 109  * from the buffer, then 0 is returned.  It is the caller's responsibility
 110  * to free the data allocated by these functions.
 111  */
 112 typedef size_t read_func_t(char *, void **);
 113 
 114 typedef struct translator_s {
 115         const char      *type_name;
 116         write_func_t    *write_func;
 117         read_func_t     *read_func;
 118 } translator_t;
 119 
 120 /*
 121  * Translator functions, defined later but declared here so that
 122  * the translator table can be defined.
 123  */
 124 static write_func_t     write_str, write_boolean, write_uint64;
 125 static read_func_t      read_str, read_boolean, read_int64;
 126 
 127 /*
 128  * Translator table, indexed by dladm_datatype_t.
 129  */
 130 static translator_t translators[] = {
 131         { "string",     write_str,      read_str        },
 132         { "boolean",    write_boolean,  read_boolean    },
 133         { "int",        write_uint64,   read_int64      }
 134 };
 135 
 136 static size_t ntranslators = sizeof (translators) / sizeof (translator_t);
 137 
 138 #define LINK_PROPERTY_DELIMINATOR       ";"
 139 #define LINK_PROPERTY_TYPE_VALUE_SEP    ","
 140 #define BASE_PROPERTY_LENGTH(t, n) (strlen(translators[(t)].type_name) +\
 141                                     strlen(LINK_PROPERTY_TYPE_VALUE_SEP) +\
 142                                     strlen(LINK_PROPERTY_DELIMINATOR) +\
 143                                     strlen((n)))
 144 #define GENERATE_PROPERTY_STRING(buf, length, conv, name, type, val) \
 145             (snprintf((buf), (length), "%s=%s%s" conv "%s", (name), \
 146             translators[(type)].type_name, \
 147             LINK_PROPERTY_TYPE_VALUE_SEP, (val), LINK_PROPERTY_DELIMINATOR))
 148 
 149 /*
 150  * Name of the cache file to keep the active <link name, linkid> mapping
 151  */
 152 char    cachefile[MAXPATHLEN];
 153 
 154 #define DLMGMT_PERSISTENT_DB_PATH       "/etc/dladm/datalink.conf"
 155 #define DLMGMT_MAKE_FILE_DB_PATH(buffer, persistent)    \
 156         (void) snprintf((buffer), MAXPATHLEN, "%s", \
 157         (persistent) ? DLMGMT_PERSISTENT_DB_PATH : cachefile);
 158 
 159 typedef struct zopen_arg {
 160         const char      *zopen_modestr;
 161         int             *zopen_pipe;
 162         int             zopen_fd;
 163 } zopen_arg_t;
 164 
 165 typedef struct zrename_arg {
 166         const char      *zrename_newname;
 167 } zrename_arg_t;
 168 
 169 typedef union zfoparg {
 170         zopen_arg_t     zfop_openarg;
 171         zrename_arg_t   zfop_renamearg;
 172 } zfoparg_t;
 173 
 174 typedef struct zfcbarg {
 175         boolean_t       zfarg_inglobalzone; /* is callback in global zone? */
 176         zoneid_t        zfarg_finglobalzone; /* is file in global zone? */
 177         const char      *zfarg_filename;
 178         zfoparg_t       *zfarg_oparg;
 179 } zfarg_t;
 180 #define zfarg_openarg   zfarg_oparg->zfop_openarg
 181 #define zfarg_renamearg zfarg_oparg->zfop_renamearg
 182 
 183 /* zone file callback */
 184 typedef int zfcb_t(zfarg_t *);
 185 
 186 /*
 187  * Execute an operation on filename relative to zoneid's zone root.  If the
 188  * file is in the global zone, then the zfcb() callback will simply be called
 189  * directly.  If the file is in a non-global zone, then zfcb() will be called
 190  * both from the global zone's context, and from the non-global zone's context
 191  * (from a fork()'ed child that has entered the non-global zone).  This is
 192  * done to allow the callback to communicate with itself if needed (e.g. to
 193  * pass back the file descriptor of an opened file).
 194  */
 195 static int
 196 dlmgmt_zfop(const char *filename, zoneid_t zoneid, zfcb_t *zfcb,
 197     zfoparg_t *zfoparg)
 198 {
 199         int             ctfd;
 200         int             err;
 201         pid_t           childpid;
 202         siginfo_t       info;
 203         zfarg_t         zfarg;
 204         ctid_t          ct;
 205 
 206         if (zoneid != GLOBAL_ZONEID) {
 207                 /*
 208                  * We need to access a file that isn't in the global zone.
 209                  * Accessing non-global zone files from the global zone is
 210                  * unsafe (due to symlink attacks), we'll need to fork a child
 211                  * that enters the zone in question and executes the callback
 212                  * that will operate on the file.
 213                  *
 214                  * Before we proceed with this zone tango, we need to create a
 215                  * new process contract for the child, as required by
 216                  * zone_enter().
 217                  */
 218                 errno = 0;
 219                 ctfd = open64("/system/contract/process/template", O_RDWR);
 220                 if (ctfd == -1)
 221                         return (errno);
 222                 if ((err = ct_tmpl_set_critical(ctfd, 0)) != 0 ||
 223                     (err = ct_tmpl_set_informative(ctfd, 0)) != 0 ||
 224                     (err = ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR)) != 0 ||
 225                     (err = ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY)) != 0 ||
 226                     (err = ct_tmpl_activate(ctfd)) != 0) {
 227                         (void) close(ctfd);
 228                         return (err);
 229                 }
 230                 childpid = fork();
 231                 switch (childpid) {
 232                 case -1:
 233                         (void) ct_tmpl_clear(ctfd);
 234                         (void) close(ctfd);
 235                         return (err);
 236                 case 0:
 237                         (void) ct_tmpl_clear(ctfd);
 238                         (void) close(ctfd);
 239                         /*
 240                          * Elevate our privileges as zone_enter() requires all
 241                          * privileges.
 242                          */
 243                         if ((err = dlmgmt_elevate_privileges()) != 0)
 244                                 _exit(err);
 245                         if (zone_enter(zoneid) == -1)
 246                                 _exit(errno);
 247                         if ((err = dlmgmt_drop_privileges()) != 0)
 248                                 _exit(err);
 249                         break;
 250                 default:
 251                         if (contract_latest(&ct) == -1)
 252                                 ct = -1;
 253                         (void) ct_tmpl_clear(ctfd);
 254                         (void) close(ctfd);
 255                         if (waitid(P_PID, childpid, &info, WEXITED) == -1) {
 256                                 (void) contract_abandon_id(ct);
 257                                 return (errno);
 258                         }
 259                         (void) contract_abandon_id(ct);
 260                         if (info.si_status != 0)
 261                                 return (info.si_status);
 262                 }
 263         }
 264 
 265         zfarg.zfarg_inglobalzone = (zoneid == GLOBAL_ZONEID || childpid != 0);
 266         zfarg.zfarg_finglobalzone = (zoneid == GLOBAL_ZONEID);
 267         zfarg.zfarg_filename = filename;
 268         zfarg.zfarg_oparg = zfoparg;
 269         err = zfcb(&zfarg);
 270         if (!zfarg.zfarg_inglobalzone)
 271                 _exit(err);
 272         return (err);
 273 }
 274 
 275 static int
 276 dlmgmt_zopen_cb(zfarg_t *zfarg)
 277 {
 278         struct strrecvfd recvfd;
 279         boolean_t       newfile = B_FALSE;
 280         boolean_t       inglobalzone = zfarg->zfarg_inglobalzone;
 281         zoneid_t        finglobalzone = zfarg->zfarg_finglobalzone;
 282         const char      *filename = zfarg->zfarg_filename;
 283         const char      *modestr = zfarg->zfarg_openarg.zopen_modestr;
 284         int             *p = zfarg->zfarg_openarg.zopen_pipe;
 285         struct stat     statbuf;
 286         int             oflags;
 287         mode_t          mode;
 288         int             fd = -1;
 289         int             err;
 290 
 291         /* We only ever open a file for reading or writing, not both. */
 292         oflags = (modestr[0] == 'r') ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC;
 293         mode = (modestr[0] == 'r') ? 0 : S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 294 
 295         /* Open the file if we're in the same zone as the file. */
 296         if (inglobalzone == finglobalzone) {
 297                 /*
 298                  * First determine if we will be creating the file as part of
 299                  * opening it.  If so, then we'll need to ensure that it has
 300                  * the proper ownership after having opened it.
 301                  */
 302                 if (oflags & O_CREAT) {
 303                         if (stat(filename, &statbuf) == -1) {
 304                                 if (errno == ENOENT)
 305                                         newfile = B_TRUE;
 306                                 else
 307                                         return (errno);
 308                         }
 309                 }
 310                 if ((fd = open(filename, oflags, mode)) == -1)
 311                         return (errno);
 312                 if (newfile) {
 313                         if (chown(filename, UID_DLADM, GID_NETADM) == -1) {
 314                                 err = errno;
 315                                 (void) close(fd);
 316                                 return (err);
 317                         }
 318                 }
 319         }
 320 
 321         /*
 322          * If we're not in the global zone, send the file-descriptor back to
 323          * our parent in the global zone.
 324          */
 325         if (!inglobalzone) {
 326                 assert(!finglobalzone);
 327                 assert(fd != -1);
 328                 return (ioctl(p[1], I_SENDFD, fd) == -1 ? errno : 0);
 329         }
 330 
 331         /*
 332          * At this point, we know we're in the global zone.  If the file was
 333          * in a non-global zone, receive the file-descriptor from our child in
 334          * the non-global zone.
 335          */
 336         if (!finglobalzone) {
 337                 if (ioctl(p[0], I_RECVFD, &recvfd) == -1)
 338                         return (errno);
 339                 fd = recvfd.fd;
 340         }
 341 
 342         zfarg->zfarg_openarg.zopen_fd = fd;
 343         return (0);
 344 }
 345 
 346 static int
 347 dlmgmt_zunlink_cb(zfarg_t *zfarg)
 348 {
 349         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 350                 return (0);
 351         return (unlink(zfarg->zfarg_filename) == 0 ? 0 : errno);
 352 }
 353 
 354 static int
 355 dlmgmt_zrename_cb(zfarg_t *zfarg)
 356 {
 357         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 358                 return (0);
 359         return (rename(zfarg->zfarg_filename,
 360             zfarg->zfarg_renamearg.zrename_newname) == 0 ? 0 : errno);
 361 }
 362 
 363 /*
 364  * Same as fopen(3C), except that it opens the file relative to zoneid's zone
 365  * root.
 366  */
 367 static FILE *
 368 dlmgmt_zfopen(const char *filename, const char *modestr, zoneid_t zoneid,
 369     int *err)
 370 {
 371         int             p[2];
 372         zfoparg_t       zfoparg;
 373         FILE            *fp = NULL;
 374 
 375         if (zoneid != GLOBAL_ZONEID && pipe(p) == -1) {
 376                 *err = errno;
 377                 return (NULL);
 378         }
 379 
 380         zfoparg.zfop_openarg.zopen_modestr = modestr;
 381         zfoparg.zfop_openarg.zopen_pipe = p;
 382         *err = dlmgmt_zfop(filename, zoneid, dlmgmt_zopen_cb, &zfoparg);
 383         if (zoneid != GLOBAL_ZONEID) {
 384                 (void) close(p[0]);
 385                 (void) close(p[1]);
 386         }
 387         if (*err == 0) {
 388                 fp = fdopen(zfoparg.zfop_openarg.zopen_fd, modestr);
 389                 if (fp == NULL) {
 390                         *err = errno;
 391                         (void) close(zfoparg.zfop_openarg.zopen_fd);
 392                 }
 393         }
 394         return (fp);
 395 }
 396 
 397 /*
 398  * Same as rename(2), except that old and new are relative to zoneid's zone
 399  * root.
 400  */
 401 static int
 402 dlmgmt_zrename(const char *old, const char *new, zoneid_t zoneid)
 403 {
 404         zfoparg_t zfoparg;
 405 
 406         zfoparg.zfop_renamearg.zrename_newname = new;
 407         return (dlmgmt_zfop(old, zoneid, dlmgmt_zrename_cb, &zfoparg));
 408 }
 409 
 410 /*
 411  * Same as unlink(2), except that filename is relative to zoneid's zone root.
 412  */
 413 static int
 414 dlmgmt_zunlink(const char *filename, zoneid_t zoneid)
 415 {
 416         return (dlmgmt_zfop(filename, zoneid, dlmgmt_zunlink_cb, NULL));
 417 }
 418 
 419 static size_t
 420 write_str(char *buffer, size_t buffer_length, char *name, void *value)
 421 {
 422         char    *ptr = value;
 423         size_t  data_length = strnlen(ptr, buffer_length);
 424 
 425         /*
 426          * Strings are assumed to be NULL terminated.  In order to fit in
 427          * the buffer, the string's length must be less then buffer_length.
 428          * If the value is empty, there's no point in writing it, in fact,
 429          * we shouldn't even see that case.
 430          */
 431         if (data_length + BASE_PROPERTY_LENGTH(DLADM_TYPE_STR, name) ==
 432             buffer_length || data_length == 0)
 433                 return (0);
 434 
 435         /*
 436          * Since we know the string will fit in the buffer, snprintf will
 437          * always return less than buffer_length, so we can just return
 438          * whatever snprintf returns.
 439          */
 440         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%s",
 441             name, DLADM_TYPE_STR, ptr));
 442 }
 443 
 444 static size_t
 445 write_boolean(char *buffer, size_t buffer_length, char *name, void *value)
 446 {
 447         boolean_t       *ptr = value;
 448 
 449         /*
 450          * Booleans are either zero or one, so we only need room for two
 451          * characters in the buffer.
 452          */
 453         if (buffer_length <= 1 + BASE_PROPERTY_LENGTH(DLADM_TYPE_BOOLEAN, name))
 454                 return (0);
 455 
 456         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%d",
 457             name, DLADM_TYPE_BOOLEAN, *ptr));
 458 }
 459 
 460 static size_t
 461 write_uint64(char *buffer, size_t buffer_length, char *name, void *value)
 462 {
 463         uint64_t        *ptr = value;
 464 
 465         /*
 466          * Limit checking for uint64_t is a little trickier.
 467          */
 468         if (snprintf(NULL, 0, "%lld", *ptr)  +
 469             BASE_PROPERTY_LENGTH(DLADM_TYPE_UINT64, name) >= buffer_length)
 470                 return (0);
 471 
 472         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%lld",
 473             name, DLADM_TYPE_UINT64, *ptr));
 474 }
 475 
 476 static size_t
 477 read_str(char *buffer, void **value)
 478 {
 479         char            *ptr = calloc(MAXLINKATTRVALLEN, sizeof (char));
 480         ssize_t         len;
 481 
 482         if (ptr == NULL || (len = strlcpy(ptr, buffer, MAXLINKATTRVALLEN))
 483             >= MAXLINKATTRVALLEN) {
 484                 free(ptr);
 485                 return (0);
 486         }
 487 
 488         *(char **)value = ptr;
 489 
 490         /* Account for NULL terminator */
 491         return (len + 1);
 492 }
 493 
 494 static size_t
 495 read_boolean(char *buffer, void **value)
 496 {
 497         boolean_t       *ptr = calloc(1, sizeof (boolean_t));
 498 
 499         if (ptr == NULL)
 500                 return (0);
 501 
 502         *ptr = atoi(buffer);
 503         *(boolean_t **)value = ptr;
 504 
 505         return (sizeof (boolean_t));
 506 }
 507 
 508 static size_t
 509 read_int64(char *buffer, void **value)
 510 {
 511         int64_t *ptr = calloc(1, sizeof (int64_t));
 512 
 513         if (ptr == NULL)
 514                 return (0);
 515 
 516         *ptr = (int64_t)atoll(buffer);
 517         *(int64_t **)value = ptr;
 518 
 519         return (sizeof (int64_t));
 520 }
 521 
 522 static dlmgmt_db_req_t *
 523 dlmgmt_db_req_alloc(dlmgmt_db_op_t op, const char *linkname,
 524     datalink_id_t linkid, zoneid_t zoneid, uint32_t flags, int *err)
 525 {
 526         dlmgmt_db_req_t *req;
 527 
 528         if ((req = calloc(1, sizeof (dlmgmt_db_req_t))) == NULL) {
 529                 *err = errno;
 530         } else {
 531                 req->ls_op = op;
 532                 if (linkname != NULL)
 533                         (void) strlcpy(req->ls_link, linkname, MAXLINKNAMELEN);
 534                 req->ls_linkid = linkid;
 535                 req->ls_zoneid = zoneid;
 536                 req->ls_flags = flags;
 537         }
 538         return (req);
 539 }
 540 
 541 /*
 542  * Update the db entry with name "entryname" using information from "linkp".
 543  */
 544 static int
 545 dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
 546     uint32_t flags)
 547 {
 548         dlmgmt_db_req_t *req;
 549         int             err;
 550 
 551         /* It is either a persistent request or an active request, not both. */
 552         assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE));
 553 
 554         if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
 555             linkp->ll_zoneid, flags, &err)) == NULL)
 556                 return (err);
 557 
 558         /* If transient op and onloan, use the global zone cache file. */
 559         if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
 560                 req->ls_zoneid = GLOBAL_ZONEID;
 561 
 562         /*
 563          * If the return error is EINPROGRESS, this request is handled
 564          * asynchronously; return success.
 565          */
 566         err = dlmgmt_process_db_req(req);
 567         if (err != EINPROGRESS)
 568                 free(req);
 569         else
 570                 err = 0;
 571         return (err);
 572 }
 573 
 574 #define DLMGMT_DB_OP_STR(op)                                    \
 575         (((op) == DLMGMT_DB_OP_READ) ? "read" :                 \
 576         (((op) == DLMGMT_DB_OP_WRITE) ? "write" : "delete"))
 577 
 578 #define DLMGMT_DB_CONF_STR(flag)                                \
 579         (((flag) == DLMGMT_ACTIVE) ? "active" :                 \
 580         (((flag) == DLMGMT_PERSIST) ? "persistent" : ""))
 581 
 582 static int
 583 dlmgmt_process_db_req(dlmgmt_db_req_t *req)
 584 {
 585         pthread_t       tid;
 586         boolean_t       writeop;
 587         int             err;
 588 
 589         /*
 590          * If there are already pending "write" requests, queue this request in
 591          * the pending list.  Note that this function is called while the
 592          * dlmgmt_rw_lock is held, so it is safe to access the global variables.
 593          */
 594         writeop = (req->ls_op != DLMGMT_DB_OP_READ);
 595         if (writeop && (req->ls_flags == DLMGMT_PERSIST) &&
 596             (dlmgmt_db_req_head != NULL)) {
 597                 dlmgmt_db_req_tail->ls_next = req;
 598                 dlmgmt_db_req_tail = req;
 599                 return (EINPROGRESS);
 600         }
 601 
 602         err = dlmgmt_process_db_onereq(req, writeop);
 603         if (err != EINPROGRESS && err != 0 && err != ENOENT) {
 604                 /*
 605                  * Log the error unless the request processing is still in
 606                  * progress or if the configuration file hasn't been created
 607                  * yet (ENOENT).
 608                  */
 609                 dlmgmt_log(LOG_WARNING, "dlmgmt_process_db_onereq() %s "
 610                     "operation on %s configuration failed: %s",
 611                     DLMGMT_DB_OP_STR(req->ls_op),
 612                     DLMGMT_DB_CONF_STR(req->ls_flags), strerror(err));
 613         }
 614 
 615         if (err == EINPROGRESS) {
 616                 assert(req->ls_flags == DLMGMT_PERSIST);
 617                 assert(writeop && dlmgmt_db_req_head == NULL);
 618                 dlmgmt_db_req_tail = dlmgmt_db_req_head = req;
 619                 err = pthread_create(&tid, NULL, dlmgmt_db_update_thread, NULL);
 620                 if (err == 0)
 621                         return (EINPROGRESS);
 622         }
 623         return (err);
 624 }
 625 
 626 static int
 627 dlmgmt_process_db_onereq(dlmgmt_db_req_t *req, boolean_t writeop)
 628 {
 629         int     err = 0;
 630         FILE    *fp, *nfp = NULL;
 631         char    file[MAXPATHLEN];
 632         char    newfile[MAXPATHLEN];
 633 
 634         DLMGMT_MAKE_FILE_DB_PATH(file, (req->ls_flags == DLMGMT_PERSIST));
 635         fp = dlmgmt_zfopen(file, "r", req->ls_zoneid, &err);
 636         /*
 637          * Note that it is not an error if the file doesn't exist.  If we're
 638          * reading, we treat this case the same way as an empty file.  If
 639          * we're writing, the file will be created when we open the file for
 640          * writing below.
 641          */
 642         if (fp == NULL && !writeop)
 643                 return (err);
 644 
 645         if (writeop) {
 646                 (void) snprintf(newfile, MAXPATHLEN, "%s.new", file);
 647                 nfp = dlmgmt_zfopen(newfile, "w", req->ls_zoneid, &err);
 648                 if (nfp == NULL) {
 649                         /*
 650                          * EROFS can happen at boot when the file system is
 651                          * read-only.  Return EINPROGRESS so that the caller
 652                          * can add this request to the pending request list
 653                          * and start a retry thread.
 654                          */
 655                         err = (errno == EROFS ? EINPROGRESS : errno);
 656                         goto done;
 657                 }
 658         }
 659         if (writeop) {
 660                 if ((err = process_db_write(req, fp, nfp)) == 0)
 661                         err = dlmgmt_zrename(newfile, file, req->ls_zoneid);
 662         } else {
 663                 err = process_db_read(req, fp);
 664         }
 665 
 666 done:
 667         if (nfp != NULL) {
 668                 (void) fclose(nfp);
 669                 if (err != 0)
 670                         (void) dlmgmt_zunlink(newfile, req->ls_zoneid);
 671         }
 672         (void) fclose(fp);
 673         return (err);
 674 }
 675 
 676 /*ARGSUSED*/
 677 static void *
 678 dlmgmt_db_update_thread(void *arg)
 679 {
 680         dlmgmt_db_req_t *req;
 681 
 682         dlmgmt_table_lock(B_TRUE);
 683 
 684         assert(dlmgmt_db_req_head != NULL);
 685         while ((req = dlmgmt_db_req_head) != NULL) {
 686                 assert(req->ls_flags == DLMGMT_PERSIST);
 687                 if (dlmgmt_process_db_onereq(req, B_TRUE) == EINPROGRESS) {
 688                         /*
 689                          * The filesystem is still read only. Go to sleep and
 690                          * try again.
 691                          */
 692                         dlmgmt_table_unlock();
 693                         (void) sleep(5);
 694                         dlmgmt_table_lock(B_TRUE);
 695                         continue;
 696                 }
 697 
 698                 /*
 699                  * The filesystem is no longer read only. Continue processing
 700                  * and remove the request from the pending list.
 701                  */
 702                 dlmgmt_db_req_head = req->ls_next;
 703                 if (dlmgmt_db_req_tail == req) {
 704                         assert(dlmgmt_db_req_head == NULL);
 705                         dlmgmt_db_req_tail = NULL;
 706                 }
 707                 free(req);
 708         }
 709 
 710         dlmgmt_table_unlock();
 711         return (NULL);
 712 }
 713 
 714 static int
 715 parse_linkprops(char *buf, dlmgmt_link_t *linkp)
 716 {
 717         boolean_t               found_type = B_FALSE;
 718         dladm_datatype_t        type = DLADM_TYPE_STR;
 719         int                     i, len;
 720         char                    *curr;
 721         char                    attr_name[MAXLINKATTRLEN];
 722         size_t                  attr_buf_len = 0;
 723         void                    *attr_buf = NULL;
 724         boolean_t               rename;
 725 
 726         curr = buf;
 727         len = strlen(buf);
 728         attr_name[0] = '\0';
 729         for (i = 0; i < len; i++) {
 730                 rename = B_FALSE;
 731                 char            c = buf[i];
 732                 boolean_t       match = (c == '=' ||
 733                     (c == ',' && !found_type) || c == ';');
 734 
 735                 /*
 736                  * Move to the next character if there is no match and
 737                  * if we have not reached the last character.
 738                  */
 739                 if (!match && i != len - 1)
 740                         continue;
 741 
 742                 if (match) {
 743                         /*
 744                          * NUL-terminate the string pointed to by 'curr'.
 745                          */
 746                         buf[i] = '\0';
 747                         if (*curr == '\0')
 748                                 goto parse_fail;
 749                 }
 750 
 751                 if (attr_name[0] != '\0' && found_type) {
 752                         /*
 753                          * We get here after we have processed the "<prop>="
 754                          * pattern. The pattern we are now interested in is
 755                          * "<val>;".
 756                          */
 757                         if (c == '=')
 758                                 goto parse_fail;
 759 
 760                         if (strcmp(attr_name, "linkid") == 0) {
 761                                 if (read_int64(curr, &attr_buf) == 0)
 762                                         goto parse_fail;
 763                                 linkp->ll_linkid =
 764                                     (datalink_class_t)*(int64_t *)attr_buf;
 765                         } else if (strcmp(attr_name, "name") == 0) {
 766                                 if (read_str(curr, &attr_buf) == 0)
 767                                         goto parse_fail;
 768                                 (void) snprintf(linkp->ll_link,
 769                                     MAXLINKNAMELEN, "%s", attr_buf);
 770                         } else if (strcmp(attr_name, "class") == 0) {
 771                                 if (read_int64(curr, &attr_buf) == 0)
 772                                         goto parse_fail;
 773                                 linkp->ll_class =
 774                                     (datalink_class_t)*(int64_t *)attr_buf;
 775                         } else if (strcmp(attr_name, "media") == 0) {
 776                                 if (read_int64(curr, &attr_buf) == 0)
 777                                         goto parse_fail;
 778                                 linkp->ll_media =
 779                                     (uint32_t)*(int64_t *)attr_buf;
 780                         } else if (strcmp(attr_name, "zone") == 0) {
 781                                 if (read_str(curr, &attr_buf) == 0)
 782                                         goto parse_fail;
 783                                 linkp->ll_zoneid = getzoneidbyname(attr_buf);
 784                                 if (linkp->ll_zoneid == -1) {
 785                                         if (errno == EFAULT)
 786                                                 abort();
 787                                         /*
 788                                          * If we can't find the zone, assign the
 789                                          * link to the GZ and mark it for being
 790                                          * renamed.
 791                                          */
 792                                         linkp->ll_zoneid = 0;
 793                                         rename = B_TRUE;
 794                                 }
 795                         } else {
 796                                 attr_buf_len = translators[type].read_func(curr,
 797                                     &attr_buf);
 798                                 if (attr_buf_len == 0)
 799                                         goto parse_fail;
 800 
 801                                 if (linkattr_set(&(linkp->ll_head), attr_name,
 802                                     attr_buf, attr_buf_len, type) != 0) {
 803                                         free(attr_buf);
 804                                         goto parse_fail;
 805                                 }
 806                         }
 807 
 808                         free(attr_buf);
 809                         attr_name[0] = '\0';
 810                         found_type = B_FALSE;
 811                 } else if (attr_name[0] != '\0') {
 812                         /*
 813                          * Non-zero length attr_name and found_type of false
 814                          * indicates that we have not found the type for this
 815                          * attribute.  The pattern now is "<type>,<val>;", we
 816                          * want the <type> part of the pattern.
 817                          */
 818                         for (type = 0; type < ntranslators; type++) {
 819                                 if (strcmp(curr,
 820                                     translators[type].type_name) == 0) {
 821                                         found_type = B_TRUE;
 822                                         break;
 823                                 }
 824                         }
 825 
 826                         if (!found_type)
 827                                 goto parse_fail;
 828                 } else {
 829                         /*
 830                          * A zero length attr_name indicates we are looking
 831                          * at the beginning of a link attribute.
 832                          */
 833                         if (c != '=')
 834                                 goto parse_fail;
 835 
 836                         (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
 837                 }
 838 
 839                 /*
 840                  * The zone that this link belongs to has died, we are
 841                  * reparenting it to the GZ and renaming it to avoid name
 842                  * collisions.
 843                  */
 844                 if (rename == B_TRUE) {
 845                         (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
 846                             "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
 847                 }
 848                 curr = buf + i + 1;
 849         }
 850 
 851         /* Correct any erroneous IPTUN datalink class constant in the file */
 852         if (linkp->ll_class == 0x60) {
 853                 linkp->ll_class = DATALINK_CLASS_IPTUN;
 854                 rewrite_needed = B_TRUE;
 855         }
 856 
 857         return (0);
 858 
 859 parse_fail:
 860         /*
 861          * Free linkp->ll_head (link attribute list)
 862          */
 863         linkattr_destroy(linkp);
 864         return (-1);
 865 }
 866 
 867 static boolean_t
 868 process_link_line(char *buf, dlmgmt_link_t *linkp)
 869 {
 870         int     i, len, llen;
 871         char    *str, *lasts;
 872         char    tmpbuf[MAXLINELEN];
 873 
 874         bzero(linkp, sizeof (*linkp));
 875         linkp->ll_linkid = DATALINK_INVALID_LINKID;
 876 
 877         /*
 878          * Use a copy of buf for parsing so that we can do whatever we want.
 879          */
 880         (void) strlcpy(tmpbuf, buf, MAXLINELEN);
 881 
 882         /*
 883          * Skip leading spaces, blank lines, and comments.
 884          */
 885         len = strlen(tmpbuf);
 886         for (i = 0; i < len; i++) {
 887                 if (!isspace(tmpbuf[i]))
 888                         break;
 889         }
 890         if (i == len || tmpbuf[i] == '#')
 891                 return (B_TRUE);
 892 
 893         str = tmpbuf + i;
 894         /*
 895          * Find the link name and assign it to the link structure.
 896          */
 897         if (strtok_r(str, " \n\t", &lasts) == NULL)
 898                 goto fail;
 899 
 900         llen = strlen(str);
 901         /*
 902          * Note that a previous version of the persistent datalink.conf file
 903          * stored the linkid as the first field.  In that case, the name will
 904          * be obtained through parse_linkprops from a property with the format
 905          * "name=<linkname>".  If we encounter such a format, we set
 906          * rewrite_needed so that dlmgmt_db_init() can rewrite the file with
 907          * the new format after it's done reading in the data.
 908          */
 909         if (isdigit(str[0])) {
 910                 linkp->ll_linkid = atoi(str);
 911                 rewrite_needed = B_TRUE;
 912         } else {
 913                 if (strlcpy(linkp->ll_link, str, sizeof (linkp->ll_link)) >=
 914                     sizeof (linkp->ll_link))
 915                         goto fail;
 916         }
 917 
 918         str += llen + 1;
 919         if (str >= tmpbuf + len)
 920                 goto fail;
 921 
 922         /*
 923          * Now find the list of link properties.
 924          */
 925         if ((str = strtok_r(str, " \n\t", &lasts)) == NULL)
 926                 goto fail;
 927 
 928         if (parse_linkprops(str, linkp) < 0)
 929                 goto fail;
 930 
 931         return (B_TRUE);
 932 
 933 fail:
 934         /*
 935          * Delete corrupted line.
 936          */
 937         buf[0] = '\0';
 938         return (B_FALSE);
 939 }
 940 
 941 /*
 942  * Find any properties in linkp that refer to "old", and rename to "new".
 943  * Return B_TRUE if any renaming occurred.
 944  */
 945 static int
 946 dlmgmt_attr_rename(dlmgmt_link_t *linkp, const char *old, const char *new,
 947     boolean_t *renamed)
 948 {
 949         dlmgmt_linkattr_t       *attrp;
 950         char                    *newval = NULL, *pname;
 951         char                    valcp[MAXLINKATTRVALLEN];
 952         size_t                  newsize;
 953 
 954         *renamed = B_FALSE;
 955 
 956         if ((attrp = linkattr_find(linkp->ll_head, "linkover")) != NULL ||
 957             (attrp = linkattr_find(linkp->ll_head, "simnetpeer")) != NULL) {
 958                 if (strcmp(old, (char *)attrp->lp_val) == 0) {
 959                         newsize = strlen(new) + 1;
 960                         if ((newval = malloc(newsize)) == NULL)
 961                                 return (errno);
 962                         (void) strcpy(newval, new);
 963                         free(attrp->lp_val);
 964                         attrp->lp_val = newval;
 965                         attrp->lp_sz = newsize;
 966                         *renamed = B_TRUE;
 967                 }
 968                 return (0);
 969         }
 970 
 971         if ((attrp = linkattr_find(linkp->ll_head, "portnames")) == NULL)
 972                 return (0);
 973 
 974         /* <linkname>:[<linkname>:]... */
 975         if ((newval = calloc(MAXLINKATTRVALLEN, sizeof (char))) == NULL)
 976                 return (errno);
 977 
 978         bcopy(attrp->lp_val, valcp, sizeof (valcp));
 979         pname = strtok(valcp, ":");
 980         while (pname != NULL) {
 981                 if (strcmp(pname, old) == 0) {
 982                         (void) strcat(newval, new);
 983                         *renamed = B_TRUE;
 984                 } else {
 985                         (void) strcat(newval, pname);
 986                 }
 987                 (void) strcat(newval, ":");
 988                 pname = strtok(NULL, ":");
 989         }
 990         if (*renamed) {
 991                 free(attrp->lp_val);
 992                 attrp->lp_val = newval;
 993                 attrp->lp_sz = strlen(newval) + 1;
 994         } else {
 995                 free(newval);
 996         }
 997         return (0);
 998 }
 999 
1000 static int
1001 process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp)
1002 {
1003         boolean_t               done = B_FALSE;
1004         int                     err = 0;
1005         dlmgmt_link_t           link_in_file, *linkp = NULL, *dblinkp;
1006         boolean_t               persist = (req->ls_flags == DLMGMT_PERSIST);
1007         boolean_t               writeall, rename, attr_renamed;
1008         char                    buf[MAXLINELEN];
1009 
1010         writeall = (req->ls_linkid == DATALINK_ALL_LINKID);
1011 
1012         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall) {
1013                 /*
1014                  * find the link in the avl tree with the given linkid.
1015                  */
1016                 linkp = link_by_id(req->ls_linkid, req->ls_zoneid);
1017                 if (linkp == NULL || (linkp->ll_flags & req->ls_flags) == 0) {
1018                         /*
1019                          * This link has already been changed. This could
1020                          * happen if the request is pending because of
1021                          * read-only file-system. If so, we are done.
1022                          */
1023                         return (0);
1024                 }
1025                 /*
1026                  * In the case of a rename, linkp's name has been updated to
1027                  * the new name, and req->ls_link is the old link name.
1028                  */
1029                 rename = (strcmp(req->ls_link, linkp->ll_link) != 0);
1030         }
1031 
1032         /*
1033          * fp can be NULL if the file didn't initially exist and we're
1034          * creating it as part of this write operation.
1035          */
1036         if (fp == NULL)
1037                 goto write;
1038 
1039         while (err == 0 && fgets(buf, sizeof (buf), fp) != NULL &&
1040             process_link_line(buf, &link_in_file)) {
1041                 /*
1042                  * Only the link name is needed. Free the memory allocated for
1043                  * the link attributes list of link_in_file.
1044                  */
1045                 linkattr_destroy(&link_in_file);
1046 
1047                 if (link_in_file.ll_link[0] == '\0' || done) {
1048                         /*
1049                          * this is a comment line or we are done updating the
1050                          * line for the specified link, write the rest of
1051                          * lines out.
1052                          */
1053                         if (fputs(buf, nfp) == EOF)
1054                                 err = errno;
1055                         continue;
1056                 }
1057 
1058                 switch (req->ls_op) {
1059                 case DLMGMT_DB_OP_WRITE:
1060                         /*
1061                          * For write operations, we generate a new output line
1062                          * if we're either writing all links (writeall) or if
1063                          * the name of the link in the file matches the one
1064                          * we're looking for.  Otherwise, we write out the
1065                          * buffer as-is.
1066                          *
1067                          * If we're doing a rename operation, ensure that any
1068                          * references to the link being renamed in link
1069                          * properties are also updated before we write
1070                          * anything.
1071                          */
1072                         if (writeall) {
1073                                 linkp = link_by_name(link_in_file.ll_link,
1074                                     req->ls_zoneid);
1075                         }
1076                         if (writeall || strcmp(req->ls_link,
1077                             link_in_file.ll_link) == 0) {
1078                                 generate_link_line(linkp, persist, buf);
1079                                 if (!writeall && !rename)
1080                                         done = B_TRUE;
1081                         } else if (rename && persist) {
1082                                 dblinkp = link_by_name(link_in_file.ll_link,
1083                                     req->ls_zoneid);
1084                                 err = dlmgmt_attr_rename(dblinkp, req->ls_link,
1085                                     linkp->ll_link, &attr_renamed);
1086                                 if (err != 0)
1087                                         break;
1088                                 if (attr_renamed) {
1089                                         generate_link_line(dblinkp, persist,
1090                                             buf);
1091                                 }
1092                         }
1093                         if (fputs(buf, nfp) == EOF)
1094                                 err = errno;
1095                         break;
1096                 case DLMGMT_DB_OP_DELETE:
1097                         /*
1098                          * Delete is simple.  If buf does not represent the
1099                          * link we're deleting, write it out.
1100                          */
1101                         if (strcmp(req->ls_link, link_in_file.ll_link) != 0) {
1102                                 if (fputs(buf, nfp) == EOF)
1103                                         err = errno;
1104                         } else {
1105                                 done = B_TRUE;
1106                         }
1107                         break;
1108                 case DLMGMT_DB_OP_READ:
1109                 default:
1110                         err = EINVAL;
1111                         break;
1112                 }
1113         }
1114 
1115 write:
1116         /*
1117          * If we get to the end of the file and have not seen what linkid
1118          * points to, write it out then.
1119          */
1120         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall && !rename && !done) {
1121                 generate_link_line(linkp, persist, buf);
1122                 done = B_TRUE;
1123                 if (fputs(buf, nfp) == EOF)
1124                         err = errno;
1125         }
1126 
1127         return (err);
1128 }
1129 
1130 static int
1131 process_db_read(dlmgmt_db_req_t *req, FILE *fp)
1132 {
1133         avl_index_t     name_where, id_where;
1134         dlmgmt_link_t   link_in_file, *newlink, *link_in_db;
1135         char            buf[MAXLINELEN];
1136         int             err = 0;
1137 
1138         /*
1139          * This loop processes each line of the configuration file.
1140          */
1141         while (fgets(buf, MAXLINELEN, fp) != NULL) {
1142                 if (!process_link_line(buf, &link_in_file)) {
1143                         err = EINVAL;
1144                         break;
1145                 }
1146 
1147                 /*
1148                  * Skip the comment line.
1149                  */
1150                 if (link_in_file.ll_link[0] == '\0') {
1151                         linkattr_destroy(&link_in_file);
1152                         continue;
1153                 }
1154 
1155                 if ((req->ls_flags & DLMGMT_ACTIVE) &&
1156                     link_in_file.ll_linkid == DATALINK_INVALID_LINKID) {
1157                         linkattr_destroy(&link_in_file);
1158                         continue;
1159                 }
1160 
1161                 link_in_file.ll_zoneid = req->ls_zoneid;
1162                 link_in_db = link_by_name(link_in_file.ll_link,
1163                     link_in_file.ll_zoneid);
1164                 if (link_in_db != NULL) {
1165                         /*
1166                          * If the link in the database already has the flag
1167                          * for this request set, then the entry is a
1168                          * duplicate.  If it's not a duplicate, then simply
1169                          * turn on the appropriate flag on the existing link.
1170                          */
1171                         if (link_in_db->ll_flags & req->ls_flags) {
1172                                 dlmgmt_log(LOG_WARNING, "Duplicate links "
1173                                     "in the repository: %s",
1174                                     link_in_file.ll_link);
1175                                 linkattr_destroy(&link_in_file);
1176                         } else {
1177                                 if (req->ls_flags & DLMGMT_PERSIST) {
1178                                         /*
1179                                          * Save the newly read properties into
1180                                          * the existing link.
1181                                          */
1182                                         assert(link_in_db->ll_head == NULL);
1183                                         link_in_db->ll_head =
1184                                             link_in_file.ll_head;
1185                                 } else {
1186                                         linkattr_destroy(&link_in_file);
1187                                 }
1188                                 link_in_db->ll_flags |= req->ls_flags;
1189                         }
1190                 } else {
1191                         /*
1192                          * This is a new link.  Allocate a new dlmgmt_link_t
1193                          * and add it to the trees.
1194                          */
1195                         newlink = calloc(1, sizeof (*newlink));
1196                         if (newlink == NULL) {
1197                                 dlmgmt_log(LOG_WARNING, "Unable to allocate "
1198                                     "memory to create new link %s",
1199                                     link_in_file.ll_link);
1200                                 linkattr_destroy(&link_in_file);
1201                                 continue;
1202                         }
1203                         bcopy(&link_in_file, newlink, sizeof (*newlink));
1204 
1205                         if (newlink->ll_linkid == DATALINK_INVALID_LINKID)
1206                                 newlink->ll_linkid = dlmgmt_nextlinkid;
1207                         if (avl_find(&dlmgmt_id_avl, newlink, &id_where) !=
1208                             NULL) {
1209                                 dlmgmt_log(LOG_WARNING, "Link ID %d is already"
1210                                     " in use, destroying link %s",
1211                                     newlink->ll_linkid, newlink->ll_link);
1212                                 link_destroy(newlink);
1213                                 continue;
1214                         }
1215 
1216                         if ((req->ls_flags & DLMGMT_ACTIVE) &&
1217                             link_activate(newlink) != 0) {
1218                                 dlmgmt_log(LOG_WARNING, "Unable to activate %s",
1219                                     newlink->ll_link);
1220                                 link_destroy(newlink);
1221                                 continue;
1222                         }
1223 
1224                         avl_insert(&dlmgmt_id_avl, newlink, id_where);
1225                         /*
1226                          * link_activate call above can insert newlink in
1227                          * dlmgmt_name_avl tree when activating a link that is
1228                          * assigned to a NGZ.
1229                          */
1230                         if (avl_find(&dlmgmt_name_avl, newlink,
1231                             &name_where) == NULL)
1232                                 avl_insert(&dlmgmt_name_avl, newlink,
1233                                     name_where);
1234 
1235                         dlmgmt_advance(newlink);
1236                         newlink->ll_flags |= req->ls_flags;
1237                 }
1238         }
1239 
1240         return (err);
1241 }
1242 
1243 /*
1244  * Generate an entry in the link database.
1245  * Each entry has this format:
1246  * <link name>    <prop0>=<type>,<val>;...;<propn>=<type>,<val>;
1247  */
1248 static void
1249 generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1250 {
1251         char                    tmpbuf[MAXLINELEN];
1252         char                    *ptr = tmpbuf;
1253         char                    *lim = tmpbuf + MAXLINELEN;
1254         dlmgmt_linkattr_t       *cur_p = NULL;
1255         uint64_t                u64;
1256 
1257         ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1258         if (!persist) {
1259                 char zname[ZONENAME_MAX];
1260                 /*
1261                  * We store the linkid and the zone name in the active database
1262                  * so that dlmgmtd can recover in the event that it is
1263                  * restarted.
1264                  */
1265                 u64 = linkp->ll_linkid;
1266                 ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
1267 
1268                 if (getzonenamebyid(linkp->ll_zoneid, zname,
1269                     sizeof (zname)) != -1) {
1270                         ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
1271                 }
1272         }
1273         u64 = linkp->ll_class;
1274         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1275         u64 = linkp->ll_media;
1276         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1277 
1278         /*
1279          * The daemon does not keep any active link attribute. Only store the
1280          * attributes if this request is for persistent configuration,
1281          */
1282         if (persist) {
1283                 for (cur_p = linkp->ll_head; cur_p != NULL;
1284                     cur_p = cur_p->lp_next) {
1285                         ptr += translators[cur_p->lp_type].write_func(ptr,
1286                             BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val);
1287                 }
1288         }
1289 
1290         if (ptr <= lim)
1291                 (void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf);
1292 }
1293 
1294 int
1295 dlmgmt_delete_db_entry(dlmgmt_link_t *linkp, uint32_t flags)
1296 {
1297         return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkp->ll_link, linkp,
1298             flags));
1299 }
1300 
1301 int
1302 dlmgmt_write_db_entry(const char *entryname, dlmgmt_link_t *linkp,
1303     uint32_t flags)
1304 {
1305         int err;
1306 
1307         if (flags & DLMGMT_PERSIST) {
1308                 if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1309                     linkp, DLMGMT_PERSIST)) != 0) {
1310                         return (err);
1311                 }
1312         }
1313 
1314         if (flags & DLMGMT_ACTIVE) {
1315                 if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1316                     linkp, DLMGMT_ACTIVE)) != 0) && (flags & DLMGMT_PERSIST)) {
1317                         (void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, entryname,
1318                             linkp, DLMGMT_PERSIST);
1319                         return (err);
1320                 }
1321         }
1322 
1323         return (0);
1324 }
1325 
1326 /*
1327  * Upgrade properties that have link IDs as values to link names.  Because '.'
1328  * is a valid linkname character, the port separater for link aggregations
1329  * must be changed to ':'.
1330  */
1331 static void
1332 linkattr_upgrade(dlmgmt_linkattr_t *attrp)
1333 {
1334         datalink_id_t   linkid;
1335         char            *portidstr;
1336         char            portname[MAXLINKNAMELEN + 1];
1337         dlmgmt_link_t   *linkp;
1338         char            *new_attr_val;
1339         size_t          new_attr_sz;
1340         boolean_t       upgraded = B_FALSE;
1341 
1342         if (strcmp(attrp->lp_name, "linkover") == 0 ||
1343             strcmp(attrp->lp_name, "simnetpeer") == 0) {
1344                 if (attrp->lp_type == DLADM_TYPE_UINT64) {
1345                         linkid = (datalink_id_t)*(uint64_t *)attrp->lp_val;
1346                         if ((linkp = link_by_id(linkid, GLOBAL_ZONEID)) == NULL)
1347                                 return;
1348                         new_attr_sz = strlen(linkp->ll_link) + 1;
1349                         if ((new_attr_val = malloc(new_attr_sz)) == NULL)
1350                                 return;
1351                         (void) strcpy(new_attr_val, linkp->ll_link);
1352                         upgraded = B_TRUE;
1353                 }
1354         } else if (strcmp(attrp->lp_name, "portnames") == 0) {
1355                 /*
1356                  * The old format for "portnames" was
1357                  * "<linkid>.[<linkid>.]...".  The new format is
1358                  * "<linkname>:[<linkname>:]...".
1359                  */
1360                 if (!isdigit(((char *)attrp->lp_val)[0]))
1361                         return;
1362                 new_attr_val = calloc(MAXLINKATTRVALLEN, sizeof (char));
1363                 if (new_attr_val == NULL)
1364                         return;
1365                 portidstr = (char *)attrp->lp_val;
1366                 while (*portidstr != '\0') {
1367                         errno = 0;
1368                         linkid = strtol(portidstr, &portidstr, 10);
1369                         if (linkid == 0 || *portidstr != '.' ||
1370                             (linkp = link_by_id(linkid, GLOBAL_ZONEID)) ==
1371                             NULL) {
1372                                 free(new_attr_val);
1373                                 return;
1374                         }
1375                         (void) snprintf(portname, sizeof (portname), "%s:",
1376                             linkp->ll_link);
1377                         if (strlcat(new_attr_val, portname,
1378                             MAXLINKATTRVALLEN) >= MAXLINKATTRVALLEN) {
1379                                 free(new_attr_val);
1380                                 return;
1381                         }
1382                         /* skip the '.' delimiter */
1383                         portidstr++;
1384                 }
1385                 new_attr_sz = strlen(new_attr_val) + 1;
1386                 upgraded = B_TRUE;
1387         }
1388 
1389         if (upgraded) {
1390                 attrp->lp_type = DLADM_TYPE_STR;
1391                 attrp->lp_sz = new_attr_sz;
1392                 free(attrp->lp_val);
1393                 attrp->lp_val = new_attr_val;
1394         }
1395 }
1396 
1397 static void
1398 dlmgmt_db_upgrade(dlmgmt_link_t *linkp)
1399 {
1400         dlmgmt_linkattr_t *attrp;
1401 
1402         for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next)
1403                 linkattr_upgrade(attrp);
1404 }
1405 
1406 static void
1407 dlmgmt_db_phys_activate(dlmgmt_link_t *linkp)
1408 {
1409         linkp->ll_flags |= DLMGMT_ACTIVE;
1410         (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_ACTIVE);
1411 }
1412 
1413 static void
1414 dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
1415 {
1416         dlmgmt_link_t *linkp;
1417 
1418         for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1419             linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1420                 if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1421                         func(linkp);
1422         }
1423 }
1424 
1425 /*
1426  * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
1427  *
1428  * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
1429  * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
1430  * Because of the upcall architecture of dlmgmtd this can lead to deadlock
1431  * with the following scenario:
1432  *    a) the thread preparing to fork will have acquired the malloc locks
1433  *       then attempt to suspend every thread in preparation to fork.
1434  *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
1435  *       and get the credentials of their caller.
1436  *    c) we can't suspend the in-kernel thread making the upcall.
1437  *
1438  * Thus, we cannot serve door requests because we're blocked in malloc()
1439  * which fork() owns, but fork() is in turn blocked on the in-kernel thread
1440  * making the door upcall.  This is a fundamental architectural problem with
1441  * any server handling upcalls and also trying to fork().
1442  *
1443  * To minimize the chance of this deadlock occuring, we check ahead of time to
1444  * see if the file we want to read actually exists in the zone (which it almost
1445  * never does), so we don't need fork in that case (i.e. rarely to never).
1446  */
1447 static boolean_t
1448 zone_file_exists(char *zoneroot, char *filename)
1449 {
1450         struct stat     sb;
1451         char            fname[MAXPATHLEN];
1452 
1453         (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
1454 
1455         if (stat(fname, &sb) == -1)
1456                 return (B_FALSE);
1457 
1458         return (B_TRUE);
1459 }
1460 
1461 /*
1462  * Initialize the datalink <link name, linkid> mapping and the link's
1463  * attributes list based on the configuration file /etc/dladm/datalink.conf
1464  * and the active configuration cache file
1465  * /etc/svc/volatile/dladm/datalink-management:default.cache.
1466  */
1467 int
1468 dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1469 {
1470         dlmgmt_db_req_t *req;
1471         int             err;
1472         boolean_t       boot = B_FALSE;
1473         char            tdir[MAXPATHLEN];
1474         char            *path = cachefile;
1475 
1476         if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1477             DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1478                 return (err);
1479 
1480         /* Handle running in a non-native branded zone (i.e. has /native) */
1481         if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
1482                 (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
1483                 path = tdir;
1484         }
1485 
1486         if (zone_file_exists(zoneroot, path)) {
1487                 if ((err = dlmgmt_process_db_req(req)) != 0) {
1488                         /*
1489                          * If we get back ENOENT, that means that the active
1490                          * configuration file doesn't exist yet, and is not an
1491                          * error.  We'll create it down below after we've
1492                          * loaded the persistent configuration.
1493                          */
1494                         if (err != ENOENT)
1495                                 goto done;
1496                         boot = B_TRUE;
1497                 }
1498         } else {
1499                 boot = B_TRUE;
1500         }
1501 
1502         if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
1503                 req->ls_flags = DLMGMT_PERSIST;
1504                 err = dlmgmt_process_db_req(req);
1505                 if (err != 0 && err != ENOENT)
1506                         goto done;
1507         }
1508         err = 0;
1509         if (rewrite_needed) {
1510                 /*
1511                  * First update links in memory, then dump the entire db to
1512                  * disk.
1513                  */
1514                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade);
1515                 req->ls_op = DLMGMT_DB_OP_WRITE;
1516                 req->ls_linkid = DATALINK_ALL_LINKID;
1517                 if ((err = dlmgmt_process_db_req(req)) != 0 &&
1518                     err != EINPROGRESS)
1519                         goto done;
1520         }
1521         if (boot) {
1522                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_PHYS,
1523                     dlmgmt_db_phys_activate);
1524         }
1525 
1526 done:
1527         if (err == EINPROGRESS)
1528                 err = 0;
1529         else
1530                 free(req);
1531         return (err);
1532 }
1533 
1534 /*
1535  * Remove all links in the given zoneid.
1536  *
1537  * We do this work in two different passes. In the first pass, we remove any
1538  * entry that hasn't been loaned and mark every entry that has been loaned as
1539  * something that is going to be tombstomed. In the second pass, we drop the
1540  * table lock for every entry and remove the tombstombed entry for our zone.
1541  */
1542 void
1543 dlmgmt_db_fini(zoneid_t zoneid)
1544 {
1545         dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1546 
1547         while (linkp != NULL) {
1548                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1549                 if (linkp->ll_zoneid == zoneid) {
1550                         boolean_t onloan = linkp->ll_onloan;
1551 
1552                         /*
1553                          * Cleanup any VNICs that were loaned to the zone
1554                          * before the zone goes away and we can no longer
1555                          * refer to the VNIC by the name/zoneid.
1556                          */
1557                         if (onloan) {
1558                                 (void) dlmgmt_delete_db_entry(linkp,
1559                                     DLMGMT_ACTIVE);
1560                                 linkp->ll_tomb = B_TRUE;
1561                         } else {
1562                                 (void) dlmgmt_destroy_common(linkp,
1563                                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1564                         }
1565 
1566                 }
1567                 linkp = next_linkp;
1568         }
1569 
1570 again:
1571         linkp = avl_first(&dlmgmt_name_avl);
1572         while (linkp != NULL) {
1573                 vnic_ioc_delete_t ioc;
1574 
1575                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1576 
1577                 if (linkp->ll_zoneid != zoneid) {
1578                         linkp = next_linkp;
1579                         continue;
1580                 }
1581                 ioc.vd_vnic_id = linkp->ll_linkid;
1582                 if (linkp->ll_tomb != B_TRUE)
1583                         abort();
1584 
1585                 /*
1586                  * We have to drop the table lock while going up into the
1587                  * kernel. If we hold the table lock while deleting a vnic, we
1588                  * may get blocked on the mac perimeter and the holder of it may
1589                  * want something from dlmgmtd.
1590                  */
1591                 dlmgmt_table_unlock();
1592 
1593                 if (ioctl(dladm_dld_fd(dld_handle),
1594                     VNIC_IOC_DELETE, &ioc) < 0)
1595                         dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
1596                             "delete VNIC ioctl failed %d %d",
1597                             ioc.vd_vnic_id, errno);
1598 
1599                 /*
1600                  * Even though we've dropped the lock, we know that nothing else
1601                  * could have removed us. Therefore, it should be safe to go
1602                  * through and delete ourselves, but do nothing else. We'll have
1603                  * to restart iteration from the beginning. This can be painful.
1604                  */
1605                 dlmgmt_table_lock(B_TRUE);
1606 
1607                 (void) dlmgmt_destroy_common(linkp,
1608                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1609                 goto again;
1610         }
1611 
1612 }