1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2014, Joyent Inc. All rights reserved.
  25  */
  26 
  27 #include <assert.h>
  28 #include <ctype.h>
  29 #include <errno.h>
  30 #include <fcntl.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <strings.h>
  35 #include <syslog.h>
  36 #include <zone.h>
  37 #include <sys/types.h>
  38 #include <sys/stat.h>
  39 #include <stropts.h>
  40 #include <sys/conf.h>
  41 #include <pthread.h>
  42 #include <unistd.h>
  43 #include <wait.h>
  44 #include <libcontract.h>
  45 #include <libcontract_priv.h>
  46 #include <sys/contract/process.h>
  47 #include <sys/vnic.h>
  48 #include <zone.h>
  49 #include "dlmgmt_impl.h"
  50 
  51 typedef enum dlmgmt_db_op {
  52         DLMGMT_DB_OP_WRITE,
  53         DLMGMT_DB_OP_DELETE,
  54         DLMGMT_DB_OP_READ
  55 } dlmgmt_db_op_t;
  56 
  57 typedef struct dlmgmt_db_req_s {
  58         struct dlmgmt_db_req_s  *ls_next;
  59         dlmgmt_db_op_t          ls_op;
  60         char                    ls_link[MAXLINKNAMELEN];
  61         datalink_id_t           ls_linkid;
  62         zoneid_t                ls_zoneid;
  63         uint32_t                ls_flags;       /* Either DLMGMT_ACTIVE or   */
  64                                                 /* DLMGMT_PERSIST, not both. */
  65 } dlmgmt_db_req_t;
  66 
  67 /*
  68  * List of pending db updates (e.g., because of a read-only filesystem).
  69  */
  70 static dlmgmt_db_req_t  *dlmgmt_db_req_head = NULL;
  71 static dlmgmt_db_req_t  *dlmgmt_db_req_tail = NULL;
  72 
  73 /*
  74  * rewrite_needed is set to B_TRUE by process_link_line() if it encounters a
  75  * line with an old format.  This will cause the file being read to be
  76  * re-written with the current format.
  77  */
  78 static boolean_t        rewrite_needed;
  79 
  80 static int              dlmgmt_db_update(dlmgmt_db_op_t, const char *,
  81                             dlmgmt_link_t *, uint32_t);
  82 static int              dlmgmt_process_db_req(dlmgmt_db_req_t *);
  83 static int              dlmgmt_process_db_onereq(dlmgmt_db_req_t *, boolean_t);
  84 static void             *dlmgmt_db_update_thread(void *);
  85 static boolean_t        process_link_line(char *, dlmgmt_link_t *);
  86 static int              process_db_write(dlmgmt_db_req_t *, FILE *, FILE *);
  87 static int              process_db_read(dlmgmt_db_req_t *, FILE *);
  88 static void             generate_link_line(dlmgmt_link_t *, boolean_t, char *);
  89 
  90 #define BUFLEN(lim, ptr)        (((lim) > (ptr)) ? ((lim) - (ptr)) : 0)
  91 #define MAXLINELEN              1024
  92 
  93 typedef void db_walk_func_t(dlmgmt_link_t *);
  94 
  95 /*
  96  * Translator functions to go from dladm_datatype_t to character strings.
  97  * Each function takes a pointer to a buffer, the size of the buffer,
  98  * the name of the attribute, and the value to be written.  The functions
  99  * return the number of bytes written to the buffer.  If the buffer is not big
 100  * enough to hold the string representing the value, then nothing is written
 101  * and 0 is returned.
 102  */
 103 typedef size_t write_func_t(char *, size_t, char *, void *);
 104 
 105 /*
 106  * Translator functions to read from a NULL terminated string buffer into
 107  * something of the given DLADM_TYPE_*.  The functions each return the number
 108  * of bytes read from the string buffer.  If there is an error reading data
 109  * from the buffer, then 0 is returned.  It is the caller's responsibility
 110  * to free the data allocated by these functions.
 111  */
 112 typedef size_t read_func_t(char *, void **);
 113 
 114 typedef struct translator_s {
 115         const char      *type_name;
 116         write_func_t    *write_func;
 117         read_func_t     *read_func;
 118 } translator_t;
 119 
 120 /*
 121  * Translator functions, defined later but declared here so that
 122  * the translator table can be defined.
 123  */
 124 static write_func_t     write_str, write_boolean, write_uint64;
 125 static read_func_t      read_str, read_boolean, read_int64;
 126 
 127 /*
 128  * Translator table, indexed by dladm_datatype_t.
 129  */
 130 static translator_t translators[] = {
 131         { "string",     write_str,      read_str        },
 132         { "boolean",    write_boolean,  read_boolean    },
 133         { "int",        write_uint64,   read_int64      }
 134 };
 135 
 136 static size_t ntranslators = sizeof (translators) / sizeof (translator_t);
 137 
 138 #define LINK_PROPERTY_DELIMINATOR       ";"
 139 #define LINK_PROPERTY_TYPE_VALUE_SEP    ","
 140 #define BASE_PROPERTY_LENGTH(t, n) (strlen(translators[(t)].type_name) +\
 141                                     strlen(LINK_PROPERTY_TYPE_VALUE_SEP) +\
 142                                     strlen(LINK_PROPERTY_DELIMINATOR) +\
 143                                     strlen((n)))
 144 #define GENERATE_PROPERTY_STRING(buf, length, conv, name, type, val) \
 145             (snprintf((buf), (length), "%s=%s%s" conv "%s", (name), \
 146             translators[(type)].type_name, \
 147             LINK_PROPERTY_TYPE_VALUE_SEP, (val), LINK_PROPERTY_DELIMINATOR))
 148 
 149 /*
 150  * Name of the cache file to keep the active <link name, linkid> mapping
 151  */
 152 char    cachefile[MAXPATHLEN];
 153 
 154 #define DLMGMT_PERSISTENT_DB_PATH       "/etc/dladm/datalink.conf"
 155 #define DLMGMT_MAKE_FILE_DB_PATH(buffer, persistent)    \
 156         (void) snprintf((buffer), MAXPATHLEN, "%s", \
 157         (persistent) ? DLMGMT_PERSISTENT_DB_PATH : cachefile);
 158 
 159 typedef struct zopen_arg {
 160         const char      *zopen_modestr;
 161         int             *zopen_pipe;
 162         int             zopen_fd;
 163 } zopen_arg_t;
 164 
 165 typedef struct zrename_arg {
 166         const char      *zrename_newname;
 167 } zrename_arg_t;
 168 
 169 typedef union zfoparg {
 170         zopen_arg_t     zfop_openarg;
 171         zrename_arg_t   zfop_renamearg;
 172 } zfoparg_t;
 173 
 174 typedef struct zfcbarg {
 175         boolean_t       zfarg_inglobalzone; /* is callback in global zone? */
 176         zoneid_t        zfarg_finglobalzone; /* is file in global zone? */
 177         const char      *zfarg_filename;
 178         zfoparg_t       *zfarg_oparg;
 179 } zfarg_t;
 180 #define zfarg_openarg   zfarg_oparg->zfop_openarg
 181 #define zfarg_renamearg zfarg_oparg->zfop_renamearg
 182 
 183 /* zone file callback */
 184 typedef int zfcb_t(zfarg_t *);
 185 
 186 /*
 187  * Execute an operation on filename relative to zoneid's zone root.  If the
 188  * file is in the global zone, then the zfcb() callback will simply be called
 189  * directly.  If the file is in a non-global zone, then zfcb() will be called
 190  * both from the global zone's context, and from the non-global zone's context
 191  * (from a fork()'ed child that has entered the non-global zone).  This is
 192  * done to allow the callback to communicate with itself if needed (e.g. to
 193  * pass back the file descriptor of an opened file).
 194  */
 195 static int
 196 dlmgmt_zfop(const char *filename, zoneid_t zoneid, zfcb_t *zfcb,
 197     zfoparg_t *zfoparg)
 198 {
 199         int             ctfd;
 200         int             err;
 201         pid_t           childpid;
 202         siginfo_t       info;
 203         zfarg_t         zfarg;
 204         ctid_t          ct;
 205 
 206         if (zoneid != GLOBAL_ZONEID) {
 207                 /*
 208                  * We need to access a file that isn't in the global zone.
 209                  * Accessing non-global zone files from the global zone is
 210                  * unsafe (due to symlink attacks), we'll need to fork a child
 211                  * that enters the zone in question and executes the callback
 212                  * that will operate on the file.
 213                  *
 214                  * Before we proceed with this zone tango, we need to create a
 215                  * new process contract for the child, as required by
 216                  * zone_enter().
 217                  */
 218                 errno = 0;
 219                 ctfd = open64("/system/contract/process/template", O_RDWR);
 220                 if (ctfd == -1)
 221                         return (errno);
 222                 if ((err = ct_tmpl_set_critical(ctfd, 0)) != 0 ||
 223                     (err = ct_tmpl_set_informative(ctfd, 0)) != 0 ||
 224                     (err = ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR)) != 0 ||
 225                     (err = ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY)) != 0 ||
 226                     (err = ct_tmpl_activate(ctfd)) != 0) {
 227                         (void) close(ctfd);
 228                         return (err);
 229                 }
 230                 childpid = fork();
 231                 switch (childpid) {
 232                 case -1:
 233                         (void) ct_tmpl_clear(ctfd);
 234                         (void) close(ctfd);
 235                         return (err);
 236                 case 0:
 237                         (void) ct_tmpl_clear(ctfd);
 238                         (void) close(ctfd);
 239                         /*
 240                          * Elevate our privileges as zone_enter() requires all
 241                          * privileges.
 242                          */
 243                         if ((err = dlmgmt_elevate_privileges()) != 0)
 244                                 _exit(err);
 245                         if (zone_enter(zoneid) == -1)
 246                                 _exit(errno);
 247                         if ((err = dlmgmt_drop_privileges()) != 0)
 248                                 _exit(err);
 249                         break;
 250                 default:
 251                         if (contract_latest(&ct) == -1)
 252                                 ct = -1;
 253                         (void) ct_tmpl_clear(ctfd);
 254                         (void) close(ctfd);
 255                         if (waitid(P_PID, childpid, &info, WEXITED) == -1) {
 256                                 (void) contract_abandon_id(ct);
 257                                 return (errno);
 258                         }
 259                         (void) contract_abandon_id(ct);
 260                         if (info.si_status != 0)
 261                                 return (info.si_status);
 262                 }
 263         }
 264 
 265         zfarg.zfarg_inglobalzone = (zoneid == GLOBAL_ZONEID || childpid != 0);
 266         zfarg.zfarg_finglobalzone = (zoneid == GLOBAL_ZONEID);
 267         zfarg.zfarg_filename = filename;
 268         zfarg.zfarg_oparg = zfoparg;
 269         err = zfcb(&zfarg);
 270         if (!zfarg.zfarg_inglobalzone)
 271                 _exit(err);
 272         return (err);
 273 }
 274 
 275 static int
 276 dlmgmt_zopen_cb(zfarg_t *zfarg)
 277 {
 278         struct strrecvfd recvfd;
 279         boolean_t       newfile = B_FALSE;
 280         boolean_t       inglobalzone = zfarg->zfarg_inglobalzone;
 281         zoneid_t        finglobalzone = zfarg->zfarg_finglobalzone;
 282         const char      *filename = zfarg->zfarg_filename;
 283         const char      *modestr = zfarg->zfarg_openarg.zopen_modestr;
 284         int             *p = zfarg->zfarg_openarg.zopen_pipe;
 285         struct stat     statbuf;
 286         int             oflags;
 287         mode_t          mode;
 288         int             fd = -1;
 289         int             err;
 290 
 291         /* We only ever open a file for reading or writing, not both. */
 292         oflags = (modestr[0] == 'r') ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC;
 293         mode = (modestr[0] == 'r') ? 0 : S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 294 
 295         /* Open the file if we're in the same zone as the file. */
 296         if (inglobalzone == finglobalzone) {
 297                 /*
 298                  * First determine if we will be creating the file as part of
 299                  * opening it.  If so, then we'll need to ensure that it has
 300                  * the proper ownership after having opened it.
 301                  */
 302                 if (oflags & O_CREAT) {
 303                         if (stat(filename, &statbuf) == -1) {
 304                                 if (errno == ENOENT)
 305                                         newfile = B_TRUE;
 306                                 else
 307                                         return (errno);
 308                         }
 309                 }
 310                 if ((fd = open(filename, oflags, mode)) == -1)
 311                         return (errno);
 312                 if (newfile) {
 313                         if (chown(filename, UID_DLADM, GID_NETADM) == -1) {
 314                                 err = errno;
 315                                 (void) close(fd);
 316                                 return (err);
 317                         }
 318                 }
 319         }
 320 
 321         /*
 322          * If we're not in the global zone, send the file-descriptor back to
 323          * our parent in the global zone.
 324          */
 325         if (!inglobalzone) {
 326                 assert(!finglobalzone);
 327                 assert(fd != -1);
 328                 return (ioctl(p[1], I_SENDFD, fd) == -1 ? errno : 0);
 329         }
 330 
 331         /*
 332          * At this point, we know we're in the global zone.  If the file was
 333          * in a non-global zone, receive the file-descriptor from our child in
 334          * the non-global zone.
 335          */
 336         if (!finglobalzone) {
 337                 if (ioctl(p[0], I_RECVFD, &recvfd) == -1)
 338                         return (errno);
 339                 fd = recvfd.fd;
 340         }
 341 
 342         zfarg->zfarg_openarg.zopen_fd = fd;
 343         return (0);
 344 }
 345 
 346 static int
 347 dlmgmt_zunlink_cb(zfarg_t *zfarg)
 348 {
 349         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 350                 return (0);
 351         return (unlink(zfarg->zfarg_filename) == 0 ? 0 : errno);
 352 }
 353 
 354 static int
 355 dlmgmt_zrename_cb(zfarg_t *zfarg)
 356 {
 357         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 358                 return (0);
 359         return (rename(zfarg->zfarg_filename,
 360             zfarg->zfarg_renamearg.zrename_newname) == 0 ? 0 : errno);
 361 }
 362 
 363 /*
 364  * Same as fopen(3C), except that it opens the file relative to zoneid's zone
 365  * root.
 366  */
 367 static FILE *
 368 dlmgmt_zfopen(const char *filename, const char *modestr, zoneid_t zoneid,
 369     int *err)
 370 {
 371         int             p[2];
 372         zfoparg_t       zfoparg;
 373         FILE            *fp = NULL;
 374 
 375         if (zoneid != GLOBAL_ZONEID && pipe(p) == -1) {
 376                 *err = errno;
 377                 return (NULL);
 378         }
 379 
 380         zfoparg.zfop_openarg.zopen_modestr = modestr;
 381         zfoparg.zfop_openarg.zopen_pipe = p;
 382         *err = dlmgmt_zfop(filename, zoneid, dlmgmt_zopen_cb, &zfoparg);
 383         if (zoneid != GLOBAL_ZONEID) {
 384                 (void) close(p[0]);
 385                 (void) close(p[1]);
 386         }
 387         if (*err == 0) {
 388                 fp = fdopen(zfoparg.zfop_openarg.zopen_fd, modestr);
 389                 if (fp == NULL) {
 390                         *err = errno;
 391                         (void) close(zfoparg.zfop_openarg.zopen_fd);
 392                 }
 393         }
 394         return (fp);
 395 }
 396 
 397 /*
 398  * Same as rename(2), except that old and new are relative to zoneid's zone
 399  * root.
 400  */
 401 static int
 402 dlmgmt_zrename(const char *old, const char *new, zoneid_t zoneid)
 403 {
 404         zfoparg_t zfoparg;
 405 
 406         zfoparg.zfop_renamearg.zrename_newname = new;
 407         return (dlmgmt_zfop(old, zoneid, dlmgmt_zrename_cb, &zfoparg));
 408 }
 409 
 410 /*
 411  * Same as unlink(2), except that filename is relative to zoneid's zone root.
 412  */
 413 static int
 414 dlmgmt_zunlink(const char *filename, zoneid_t zoneid)
 415 {
 416         return (dlmgmt_zfop(filename, zoneid, dlmgmt_zunlink_cb, NULL));
 417 }
 418 
 419 static size_t
 420 write_str(char *buffer, size_t buffer_length, char *name, void *value)
 421 {
 422         char    *ptr = value;
 423         size_t  data_length = strnlen(ptr, buffer_length);
 424 
 425         /*
 426          * Strings are assumed to be NULL terminated.  In order to fit in
 427          * the buffer, the string's length must be less then buffer_length.
 428          * If the value is empty, there's no point in writing it, in fact,
 429          * we shouldn't even see that case.
 430          */
 431         if (data_length + BASE_PROPERTY_LENGTH(DLADM_TYPE_STR, name) ==
 432             buffer_length || data_length == 0)
 433                 return (0);
 434 
 435         /*
 436          * Since we know the string will fit in the buffer, snprintf will
 437          * always return less than buffer_length, so we can just return
 438          * whatever snprintf returns.
 439          */
 440         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%s",
 441             name, DLADM_TYPE_STR, ptr));
 442 }
 443 
 444 static size_t
 445 write_boolean(char *buffer, size_t buffer_length, char *name, void *value)
 446 {
 447         boolean_t       *ptr = value;
 448 
 449         /*
 450          * Booleans are either zero or one, so we only need room for two
 451          * characters in the buffer.
 452          */
 453         if (buffer_length <= 1 + BASE_PROPERTY_LENGTH(DLADM_TYPE_BOOLEAN, name))
 454                 return (0);
 455 
 456         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%d",
 457             name, DLADM_TYPE_BOOLEAN, *ptr));
 458 }
 459 
 460 static size_t
 461 write_uint64(char *buffer, size_t buffer_length, char *name, void *value)
 462 {
 463         uint64_t        *ptr = value;
 464 
 465         /*
 466          * Limit checking for uint64_t is a little trickier.
 467          */
 468         if (snprintf(NULL, 0, "%lld", *ptr)  +
 469             BASE_PROPERTY_LENGTH(DLADM_TYPE_UINT64, name) >= buffer_length)
 470                 return (0);
 471 
 472         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%lld",
 473             name, DLADM_TYPE_UINT64, *ptr));
 474 }
 475 
 476 static size_t
 477 read_str(char *buffer, void **value)
 478 {
 479         char            *ptr = calloc(MAXLINKATTRVALLEN, sizeof (char));
 480         ssize_t         len;
 481 
 482         if (ptr == NULL || (len = strlcpy(ptr, buffer, MAXLINKATTRVALLEN))
 483             >= MAXLINKATTRVALLEN) {
 484                 free(ptr);
 485                 return (0);
 486         }
 487 
 488         *(char **)value = ptr;
 489 
 490         /* Account for NULL terminator */
 491         return (len + 1);
 492 }
 493 
 494 static size_t
 495 read_boolean(char *buffer, void **value)
 496 {
 497         boolean_t       *ptr = calloc(1, sizeof (boolean_t));
 498 
 499         if (ptr == NULL)
 500                 return (0);
 501 
 502         *ptr = atoi(buffer);
 503         *(boolean_t **)value = ptr;
 504 
 505         return (sizeof (boolean_t));
 506 }
 507 
 508 static size_t
 509 read_int64(char *buffer, void **value)
 510 {
 511         int64_t *ptr = calloc(1, sizeof (int64_t));
 512 
 513         if (ptr == NULL)
 514                 return (0);
 515 
 516         *ptr = (int64_t)atoll(buffer);
 517         *(int64_t **)value = ptr;
 518 
 519         return (sizeof (int64_t));
 520 }
 521 
 522 static dlmgmt_db_req_t *
 523 dlmgmt_db_req_alloc(dlmgmt_db_op_t op, const char *linkname,
 524     datalink_id_t linkid, zoneid_t zoneid, uint32_t flags, int *err)
 525 {
 526         dlmgmt_db_req_t *req;
 527 
 528         if ((req = calloc(1, sizeof (dlmgmt_db_req_t))) == NULL) {
 529                 *err = errno;
 530         } else {
 531                 req->ls_op = op;
 532                 if (linkname != NULL)
 533                         (void) strlcpy(req->ls_link, linkname, MAXLINKNAMELEN);
 534                 req->ls_linkid = linkid;
 535                 req->ls_zoneid = zoneid;
 536                 req->ls_flags = flags;
 537         }
 538         return (req);
 539 }
 540 
 541 /*
 542  * Update the db entry with name "entryname" using information from "linkp".
 543  */
 544 static int
 545 dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
 546     uint32_t flags)
 547 {
 548         dlmgmt_db_req_t *req;
 549         int             err;
 550 
 551         /* It is either a persistent request or an active request, not both. */
 552         assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE));
 553 
 554         if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
 555             linkp->ll_zoneid, flags, &err)) == NULL)
 556                 return (err);
 557 
 558         /* If transient op and onloan, use the global zone cache file. */
 559         if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
 560                 req->ls_zoneid = GLOBAL_ZONEID;
 561 
 562         /*
 563          * If the return error is EINPROGRESS, this request is handled
 564          * asynchronously; return success.
 565          */
 566         err = dlmgmt_process_db_req(req);
 567         if (err != EINPROGRESS)
 568                 free(req);
 569         else
 570                 err = 0;
 571         return (err);
 572 }
 573 
 574 #define DLMGMT_DB_OP_STR(op)                                    \
 575         (((op) == DLMGMT_DB_OP_READ) ? "read" :                 \
 576         (((op) == DLMGMT_DB_OP_WRITE) ? "write" : "delete"))
 577 
 578 #define DLMGMT_DB_CONF_STR(flag)                                \
 579         (((flag) == DLMGMT_ACTIVE) ? "active" :                 \
 580         (((flag) == DLMGMT_PERSIST) ? "persistent" : ""))
 581 
 582 static int
 583 dlmgmt_process_db_req(dlmgmt_db_req_t *req)
 584 {
 585         pthread_t       tid;
 586         boolean_t       writeop;
 587         int             err;
 588 
 589         /*
 590          * If there are already pending "write" requests, queue this request in
 591          * the pending list.  Note that this function is called while the
 592          * dlmgmt_rw_lock is held, so it is safe to access the global variables.
 593          */
 594         writeop = (req->ls_op != DLMGMT_DB_OP_READ);
 595         if (writeop && (req->ls_flags == DLMGMT_PERSIST) &&
 596             (dlmgmt_db_req_head != NULL)) {
 597                 dlmgmt_db_req_tail->ls_next = req;
 598                 dlmgmt_db_req_tail = req;
 599                 return (EINPROGRESS);
 600         }
 601 
 602         err = dlmgmt_process_db_onereq(req, writeop);
 603         if (err != EINPROGRESS && err != 0 && err != ENOENT) {
 604                 /*
 605                  * Log the error unless the request processing is still in
 606                  * progress or if the configuration file hasn't been created
 607                  * yet (ENOENT).
 608                  */
 609                 dlmgmt_log(LOG_WARNING, "dlmgmt_process_db_onereq() %s "
 610                     "operation on %s configuration failed: %s",
 611                     DLMGMT_DB_OP_STR(req->ls_op),
 612                     DLMGMT_DB_CONF_STR(req->ls_flags), strerror(err));
 613         }
 614 
 615         if (err == EINPROGRESS) {
 616                 assert(req->ls_flags == DLMGMT_PERSIST);
 617                 assert(writeop && dlmgmt_db_req_head == NULL);
 618                 dlmgmt_db_req_tail = dlmgmt_db_req_head = req;
 619                 err = pthread_create(&tid, NULL, dlmgmt_db_update_thread, NULL);
 620                 if (err == 0)
 621                         return (EINPROGRESS);
 622         }
 623         return (err);
 624 }
 625 
 626 static int
 627 dlmgmt_process_db_onereq(dlmgmt_db_req_t *req, boolean_t writeop)
 628 {
 629         int     err = 0;
 630         FILE    *fp, *nfp = NULL;
 631         char    file[MAXPATHLEN];
 632         char    newfile[MAXPATHLEN];
 633 
 634         DLMGMT_MAKE_FILE_DB_PATH(file, (req->ls_flags == DLMGMT_PERSIST));
 635         fp = dlmgmt_zfopen(file, "r", req->ls_zoneid, &err);
 636         /*
 637          * Note that it is not an error if the file doesn't exist.  If we're
 638          * reading, we treat this case the same way as an empty file.  If
 639          * we're writing, the file will be created when we open the file for
 640          * writing below.
 641          */
 642         if (fp == NULL && !writeop)
 643                 return (err);
 644 
 645         if (writeop) {
 646                 (void) snprintf(newfile, MAXPATHLEN, "%s.new", file);
 647                 nfp = dlmgmt_zfopen(newfile, "w", req->ls_zoneid, &err);
 648                 if (nfp == NULL) {
 649                         /*
 650                          * EROFS can happen at boot when the file system is
 651                          * read-only.  Return EINPROGRESS so that the caller
 652                          * can add this request to the pending request list
 653                          * and start a retry thread.
 654                          */
 655                         err = (errno == EROFS ? EINPROGRESS : errno);
 656                         goto done;
 657                 }
 658         }
 659         if (writeop) {
 660                 if ((err = process_db_write(req, fp, nfp)) == 0)
 661                         err = dlmgmt_zrename(newfile, file, req->ls_zoneid);
 662         } else {
 663                 err = process_db_read(req, fp);
 664         }
 665 
 666 done:
 667         if (nfp != NULL) {
 668                 (void) fclose(nfp);
 669                 if (err != 0)
 670                         (void) dlmgmt_zunlink(newfile, req->ls_zoneid);
 671         }
 672         (void) fclose(fp);
 673         return (err);
 674 }
 675 
 676 /*ARGSUSED*/
 677 static void *
 678 dlmgmt_db_update_thread(void *arg)
 679 {
 680         dlmgmt_db_req_t *req;
 681 
 682         dlmgmt_table_lock(B_TRUE);
 683 
 684         assert(dlmgmt_db_req_head != NULL);
 685         while ((req = dlmgmt_db_req_head) != NULL) {
 686                 assert(req->ls_flags == DLMGMT_PERSIST);
 687                 if (dlmgmt_process_db_onereq(req, B_TRUE) == EINPROGRESS) {
 688                         /*
 689                          * The filesystem is still read only. Go to sleep and
 690                          * try again.
 691                          */
 692                         dlmgmt_table_unlock();
 693                         (void) sleep(5);
 694                         dlmgmt_table_lock(B_TRUE);
 695                         continue;
 696                 }
 697 
 698                 /*
 699                  * The filesystem is no longer read only. Continue processing
 700                  * and remove the request from the pending list.
 701                  */
 702                 dlmgmt_db_req_head = req->ls_next;
 703                 if (dlmgmt_db_req_tail == req) {
 704                         assert(dlmgmt_db_req_head == NULL);
 705                         dlmgmt_db_req_tail = NULL;
 706                 }
 707                 free(req);
 708         }
 709 
 710         dlmgmt_table_unlock();
 711         return (NULL);
 712 }
 713 
 714 static int
 715 parse_linkprops(char *buf, dlmgmt_link_t *linkp)
 716 {
 717         boolean_t               found_type = B_FALSE;
 718         dladm_datatype_t        type = DLADM_TYPE_STR;
 719         int                     i, len;
 720         char                    *curr;
 721         char                    attr_name[MAXLINKATTRLEN];
 722         size_t                  attr_buf_len = 0;
 723         void                    *attr_buf = NULL;
 724         boolean_t               rename;
 725 
 726         curr = buf;
 727         len = strlen(buf);
 728         attr_name[0] = '\0';
 729         for (i = 0; i < len; i++) {
 730                 char            c = buf[i];
 731                 boolean_t       match = (c == '=' ||
 732                     (c == ',' && !found_type) || c == ';');
 733 
 734                 rename = B_FALSE;
 735 
 736                 /*
 737                  * Move to the next character if there is no match and
 738                  * if we have not reached the last character.
 739                  */
 740                 if (!match && i != len - 1)
 741                         continue;
 742 
 743                 if (match) {
 744                         /*
 745                          * NUL-terminate the string pointed to by 'curr'.
 746                          */
 747                         buf[i] = '\0';
 748                         if (*curr == '\0')
 749                                 goto parse_fail;
 750                 }
 751 
 752                 if (attr_name[0] != '\0' && found_type) {
 753                         /*
 754                          * We get here after we have processed the "<prop>="
 755                          * pattern. The pattern we are now interested in is
 756                          * "<val>;".
 757                          */
 758                         if (c == '=')
 759                                 goto parse_fail;
 760 
 761                         if (strcmp(attr_name, "linkid") == 0) {
 762                                 if (read_int64(curr, &attr_buf) == 0)
 763                                         goto parse_fail;
 764                                 linkp->ll_linkid =
 765                                     (datalink_class_t)*(int64_t *)attr_buf;
 766                         } else if (strcmp(attr_name, "name") == 0) {
 767                                 if (read_str(curr, &attr_buf) == 0)
 768                                         goto parse_fail;
 769                                 (void) snprintf(linkp->ll_link,
 770                                     MAXLINKNAMELEN, "%s", attr_buf);
 771                         } else if (strcmp(attr_name, "class") == 0) {
 772                                 if (read_int64(curr, &attr_buf) == 0)
 773                                         goto parse_fail;
 774                                 linkp->ll_class =
 775                                     (datalink_class_t)*(int64_t *)attr_buf;
 776                         } else if (strcmp(attr_name, "media") == 0) {
 777                                 if (read_int64(curr, &attr_buf) == 0)
 778                                         goto parse_fail;
 779                                 linkp->ll_media =
 780                                     (uint32_t)*(int64_t *)attr_buf;
 781                         } else if (strcmp(attr_name, "zone") == 0) {
 782                                 if (read_str(curr, &attr_buf) == 0)
 783                                         goto parse_fail;
 784                                 linkp->ll_zoneid = getzoneidbyname(attr_buf);
 785                                 if (linkp->ll_zoneid == -1) {
 786                                         if (errno == EFAULT)
 787                                                 abort();
 788                                         /*
 789                                          * If we can't find the zone, assign the
 790                                          * link to the GZ and mark it for being
 791                                          * renamed.
 792                                          */
 793                                         linkp->ll_zoneid = 0;
 794                                         rename = B_TRUE;
 795                                 }
 796                         } else {
 797                                 attr_buf_len = translators[type].read_func(curr,
 798                                     &attr_buf);
 799                                 if (attr_buf_len == 0)
 800                                         goto parse_fail;
 801 
 802                                 if (linkattr_set(&(linkp->ll_head), attr_name,
 803                                     attr_buf, attr_buf_len, type) != 0) {
 804                                         free(attr_buf);
 805                                         goto parse_fail;
 806                                 }
 807                         }
 808 
 809                         free(attr_buf);
 810                         attr_name[0] = '\0';
 811                         found_type = B_FALSE;
 812                 } else if (attr_name[0] != '\0') {
 813                         /*
 814                          * Non-zero length attr_name and found_type of false
 815                          * indicates that we have not found the type for this
 816                          * attribute.  The pattern now is "<type>,<val>;", we
 817                          * want the <type> part of the pattern.
 818                          */
 819                         for (type = 0; type < ntranslators; type++) {
 820                                 if (strcmp(curr,
 821                                     translators[type].type_name) == 0) {
 822                                         found_type = B_TRUE;
 823                                         break;
 824                                 }
 825                         }
 826 
 827                         if (!found_type)
 828                                 goto parse_fail;
 829                 } else {
 830                         /*
 831                          * A zero length attr_name indicates we are looking
 832                          * at the beginning of a link attribute.
 833                          */
 834                         if (c != '=')
 835                                 goto parse_fail;
 836 
 837                         (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
 838                 }
 839 
 840                 /*
 841                  * The zone that this link belongs to has died, we are
 842                  * reparenting it to the GZ and renaming it to avoid name
 843                  * collisions.
 844                  */
 845                 if (rename == B_TRUE) {
 846                         (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
 847                             "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
 848                 }
 849                 curr = buf + i + 1;
 850         }
 851 
 852         /* Correct any erroneous IPTUN datalink class constant in the file */
 853         if (linkp->ll_class == 0x60) {
 854                 linkp->ll_class = DATALINK_CLASS_IPTUN;
 855                 rewrite_needed = B_TRUE;
 856         }
 857 
 858         return (0);
 859 
 860 parse_fail:
 861         /*
 862          * Free linkp->ll_head (link attribute list)
 863          */
 864         linkattr_destroy(linkp);
 865         return (-1);
 866 }
 867 
 868 static boolean_t
 869 process_link_line(char *buf, dlmgmt_link_t *linkp)
 870 {
 871         int     i, len, llen;
 872         char    *str, *lasts;
 873         char    tmpbuf[MAXLINELEN];
 874 
 875         bzero(linkp, sizeof (*linkp));
 876         linkp->ll_linkid = DATALINK_INVALID_LINKID;
 877 
 878         /*
 879          * Use a copy of buf for parsing so that we can do whatever we want.
 880          */
 881         (void) strlcpy(tmpbuf, buf, MAXLINELEN);
 882 
 883         /*
 884          * Skip leading spaces, blank lines, and comments.
 885          */
 886         len = strlen(tmpbuf);
 887         for (i = 0; i < len; i++) {
 888                 if (!isspace(tmpbuf[i]))
 889                         break;
 890         }
 891         if (i == len || tmpbuf[i] == '#')
 892                 return (B_TRUE);
 893 
 894         str = tmpbuf + i;
 895         /*
 896          * Find the link name and assign it to the link structure.
 897          */
 898         if (strtok_r(str, " \n\t", &lasts) == NULL)
 899                 goto fail;
 900 
 901         llen = strlen(str);
 902         /*
 903          * Note that a previous version of the persistent datalink.conf file
 904          * stored the linkid as the first field.  In that case, the name will
 905          * be obtained through parse_linkprops from a property with the format
 906          * "name=<linkname>".  If we encounter such a format, we set
 907          * rewrite_needed so that dlmgmt_db_init() can rewrite the file with
 908          * the new format after it's done reading in the data.
 909          */
 910         if (isdigit(str[0])) {
 911                 linkp->ll_linkid = atoi(str);
 912                 rewrite_needed = B_TRUE;
 913         } else {
 914                 if (strlcpy(linkp->ll_link, str, sizeof (linkp->ll_link)) >=
 915                     sizeof (linkp->ll_link))
 916                         goto fail;
 917         }
 918 
 919         str += llen + 1;
 920         if (str >= tmpbuf + len)
 921                 goto fail;
 922 
 923         /*
 924          * Now find the list of link properties.
 925          */
 926         if ((str = strtok_r(str, " \n\t", &lasts)) == NULL)
 927                 goto fail;
 928 
 929         if (parse_linkprops(str, linkp) < 0)
 930                 goto fail;
 931 
 932         return (B_TRUE);
 933 
 934 fail:
 935         /*
 936          * Delete corrupted line.
 937          */
 938         buf[0] = '\0';
 939         return (B_FALSE);
 940 }
 941 
 942 /*
 943  * Find any properties in linkp that refer to "old", and rename to "new".
 944  * Return B_TRUE if any renaming occurred.
 945  */
 946 static int
 947 dlmgmt_attr_rename(dlmgmt_link_t *linkp, const char *old, const char *new,
 948     boolean_t *renamed)
 949 {
 950         dlmgmt_linkattr_t       *attrp;
 951         char                    *newval = NULL, *pname;
 952         char                    valcp[MAXLINKATTRVALLEN];
 953         size_t                  newsize;
 954 
 955         *renamed = B_FALSE;
 956 
 957         if ((attrp = linkattr_find(linkp->ll_head, "linkover")) != NULL ||
 958             (attrp = linkattr_find(linkp->ll_head, "simnetpeer")) != NULL) {
 959                 if (strcmp(old, (char *)attrp->lp_val) == 0) {
 960                         newsize = strlen(new) + 1;
 961                         if ((newval = malloc(newsize)) == NULL)
 962                                 return (errno);
 963                         (void) strcpy(newval, new);
 964                         free(attrp->lp_val);
 965                         attrp->lp_val = newval;
 966                         attrp->lp_sz = newsize;
 967                         *renamed = B_TRUE;
 968                 }
 969                 return (0);
 970         }
 971 
 972         if ((attrp = linkattr_find(linkp->ll_head, "portnames")) == NULL)
 973                 return (0);
 974 
 975         /* <linkname>:[<linkname>:]... */
 976         if ((newval = calloc(MAXLINKATTRVALLEN, sizeof (char))) == NULL)
 977                 return (errno);
 978 
 979         bcopy(attrp->lp_val, valcp, sizeof (valcp));
 980         pname = strtok(valcp, ":");
 981         while (pname != NULL) {
 982                 if (strcmp(pname, old) == 0) {
 983                         (void) strcat(newval, new);
 984                         *renamed = B_TRUE;
 985                 } else {
 986                         (void) strcat(newval, pname);
 987                 }
 988                 (void) strcat(newval, ":");
 989                 pname = strtok(NULL, ":");
 990         }
 991         if (*renamed) {
 992                 free(attrp->lp_val);
 993                 attrp->lp_val = newval;
 994                 attrp->lp_sz = strlen(newval) + 1;
 995         } else {
 996                 free(newval);
 997         }
 998         return (0);
 999 }
1000 
1001 static int
1002 process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp)
1003 {
1004         boolean_t               done = B_FALSE;
1005         int                     err = 0;
1006         dlmgmt_link_t           link_in_file, *linkp = NULL, *dblinkp;
1007         boolean_t               persist = (req->ls_flags == DLMGMT_PERSIST);
1008         boolean_t               writeall, rename, attr_renamed;
1009         char                    buf[MAXLINELEN];
1010 
1011         writeall = (req->ls_linkid == DATALINK_ALL_LINKID);
1012 
1013         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall) {
1014                 /*
1015                  * find the link in the avl tree with the given linkid.
1016                  */
1017                 linkp = link_by_id(req->ls_linkid, req->ls_zoneid);
1018                 if (linkp == NULL || (linkp->ll_flags & req->ls_flags) == 0) {
1019                         /*
1020                          * This link has already been changed. This could
1021                          * happen if the request is pending because of
1022                          * read-only file-system. If so, we are done.
1023                          */
1024                         return (0);
1025                 }
1026                 /*
1027                  * In the case of a rename, linkp's name has been updated to
1028                  * the new name, and req->ls_link is the old link name.
1029                  */
1030                 rename = (strcmp(req->ls_link, linkp->ll_link) != 0);
1031         }
1032 
1033         /*
1034          * fp can be NULL if the file didn't initially exist and we're
1035          * creating it as part of this write operation.
1036          */
1037         if (fp == NULL)
1038                 goto write;
1039 
1040         while (err == 0 && fgets(buf, sizeof (buf), fp) != NULL &&
1041             process_link_line(buf, &link_in_file)) {
1042                 /*
1043                  * Only the link name is needed. Free the memory allocated for
1044                  * the link attributes list of link_in_file.
1045                  */
1046                 linkattr_destroy(&link_in_file);
1047 
1048                 if (link_in_file.ll_link[0] == '\0' || done) {
1049                         /*
1050                          * this is a comment line or we are done updating the
1051                          * line for the specified link, write the rest of
1052                          * lines out.
1053                          */
1054                         if (fputs(buf, nfp) == EOF)
1055                                 err = errno;
1056                         continue;
1057                 }
1058 
1059                 switch (req->ls_op) {
1060                 case DLMGMT_DB_OP_WRITE:
1061                         /*
1062                          * For write operations, we generate a new output line
1063                          * if we're either writing all links (writeall) or if
1064                          * the name of the link in the file matches the one
1065                          * we're looking for.  Otherwise, we write out the
1066                          * buffer as-is.
1067                          *
1068                          * If we're doing a rename operation, ensure that any
1069                          * references to the link being renamed in link
1070                          * properties are also updated before we write
1071                          * anything.
1072                          */
1073                         if (writeall) {
1074                                 linkp = link_by_name(link_in_file.ll_link,
1075                                     req->ls_zoneid);
1076                         }
1077                         if (writeall || strcmp(req->ls_link,
1078                             link_in_file.ll_link) == 0) {
1079                                 generate_link_line(linkp, persist, buf);
1080                                 if (!writeall && !rename)
1081                                         done = B_TRUE;
1082                         } else if (rename && persist) {
1083                                 dblinkp = link_by_name(link_in_file.ll_link,
1084                                     req->ls_zoneid);
1085                                 err = dlmgmt_attr_rename(dblinkp, req->ls_link,
1086                                     linkp->ll_link, &attr_renamed);
1087                                 if (err != 0)
1088                                         break;
1089                                 if (attr_renamed) {
1090                                         generate_link_line(dblinkp, persist,
1091                                             buf);
1092                                 }
1093                         }
1094                         if (fputs(buf, nfp) == EOF)
1095                                 err = errno;
1096                         break;
1097                 case DLMGMT_DB_OP_DELETE:
1098                         /*
1099                          * Delete is simple.  If buf does not represent the
1100                          * link we're deleting, write it out.
1101                          */
1102                         if (strcmp(req->ls_link, link_in_file.ll_link) != 0) {
1103                                 if (fputs(buf, nfp) == EOF)
1104                                         err = errno;
1105                         } else {
1106                                 done = B_TRUE;
1107                         }
1108                         break;
1109                 case DLMGMT_DB_OP_READ:
1110                 default:
1111                         err = EINVAL;
1112                         break;
1113                 }
1114         }
1115 
1116 write:
1117         /*
1118          * If we get to the end of the file and have not seen what linkid
1119          * points to, write it out then.
1120          */
1121         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall && !rename && !done) {
1122                 generate_link_line(linkp, persist, buf);
1123                 done = B_TRUE;
1124                 if (fputs(buf, nfp) == EOF)
1125                         err = errno;
1126         }
1127 
1128         return (err);
1129 }
1130 
1131 static int
1132 process_db_read(dlmgmt_db_req_t *req, FILE *fp)
1133 {
1134         avl_index_t     name_where, id_where;
1135         dlmgmt_link_t   link_in_file, *newlink, *link_in_db;
1136         char            buf[MAXLINELEN];
1137         int             err = 0;
1138 
1139         /*
1140          * This loop processes each line of the configuration file.
1141          */
1142         while (fgets(buf, MAXLINELEN, fp) != NULL) {
1143                 if (!process_link_line(buf, &link_in_file)) {
1144                         err = EINVAL;
1145                         break;
1146                 }
1147 
1148                 /*
1149                  * Skip the comment line.
1150                  */
1151                 if (link_in_file.ll_link[0] == '\0') {
1152                         linkattr_destroy(&link_in_file);
1153                         continue;
1154                 }
1155 
1156                 if ((req->ls_flags & DLMGMT_ACTIVE) &&
1157                     link_in_file.ll_linkid == DATALINK_INVALID_LINKID) {
1158                         linkattr_destroy(&link_in_file);
1159                         continue;
1160                 }
1161 
1162                 link_in_file.ll_zoneid = req->ls_zoneid;
1163                 link_in_db = link_by_name(link_in_file.ll_link,
1164                     link_in_file.ll_zoneid);
1165                 if (link_in_db != NULL) {
1166                         /*
1167                          * If the link in the database already has the flag
1168                          * for this request set, then the entry is a
1169                          * duplicate.  If it's not a duplicate, then simply
1170                          * turn on the appropriate flag on the existing link.
1171                          */
1172                         if (link_in_db->ll_flags & req->ls_flags) {
1173                                 dlmgmt_log(LOG_WARNING, "Duplicate links "
1174                                     "in the repository: %s",
1175                                     link_in_file.ll_link);
1176                                 linkattr_destroy(&link_in_file);
1177                         } else {
1178                                 if (req->ls_flags & DLMGMT_PERSIST) {
1179                                         /*
1180                                          * Save the newly read properties into
1181                                          * the existing link.
1182                                          */
1183                                         assert(link_in_db->ll_head == NULL);
1184                                         link_in_db->ll_head =
1185                                             link_in_file.ll_head;
1186                                 } else {
1187                                         linkattr_destroy(&link_in_file);
1188                                 }
1189                                 link_in_db->ll_flags |= req->ls_flags;
1190                         }
1191                 } else {
1192                         /*
1193                          * This is a new link.  Allocate a new dlmgmt_link_t
1194                          * and add it to the trees.
1195                          */
1196                         newlink = calloc(1, sizeof (*newlink));
1197                         if (newlink == NULL) {
1198                                 dlmgmt_log(LOG_WARNING, "Unable to allocate "
1199                                     "memory to create new link %s",
1200                                     link_in_file.ll_link);
1201                                 linkattr_destroy(&link_in_file);
1202                                 continue;
1203                         }
1204                         bcopy(&link_in_file, newlink, sizeof (*newlink));
1205 
1206                         if (newlink->ll_linkid == DATALINK_INVALID_LINKID)
1207                                 newlink->ll_linkid = dlmgmt_nextlinkid;
1208                         if (avl_find(&dlmgmt_id_avl, newlink, &id_where) !=
1209                             NULL) {
1210                                 dlmgmt_log(LOG_WARNING, "Link ID %d is already"
1211                                     " in use, destroying link %s",
1212                                     newlink->ll_linkid, newlink->ll_link);
1213                                 link_destroy(newlink);
1214                                 continue;
1215                         }
1216 
1217                         if ((req->ls_flags & DLMGMT_ACTIVE) &&
1218                             link_activate(newlink) != 0) {
1219                                 dlmgmt_log(LOG_WARNING, "Unable to activate %s",
1220                                     newlink->ll_link);
1221                                 link_destroy(newlink);
1222                                 continue;
1223                         }
1224 
1225                         avl_insert(&dlmgmt_id_avl, newlink, id_where);
1226                         /*
1227                          * link_activate call above can insert newlink in
1228                          * dlmgmt_name_avl tree when activating a link that is
1229                          * assigned to a NGZ.
1230                          */
1231                         if (avl_find(&dlmgmt_name_avl, newlink,
1232                             &name_where) == NULL)
1233                                 avl_insert(&dlmgmt_name_avl, newlink,
1234                                     name_where);
1235 
1236                         dlmgmt_advance(newlink);
1237                         newlink->ll_flags |= req->ls_flags;
1238                 }
1239         }
1240 
1241         return (err);
1242 }
1243 
1244 /*
1245  * Generate an entry in the link database.
1246  * Each entry has this format:
1247  * <link name>    <prop0>=<type>,<val>;...;<propn>=<type>,<val>;
1248  */
1249 static void
1250 generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1251 {
1252         char                    tmpbuf[MAXLINELEN];
1253         char                    *ptr = tmpbuf;
1254         char                    *lim = tmpbuf + MAXLINELEN;
1255         dlmgmt_linkattr_t       *cur_p = NULL;
1256         uint64_t                u64;
1257 
1258         ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1259         if (!persist) {
1260                 char zname[ZONENAME_MAX];
1261                 /*
1262                  * We store the linkid and the zone name in the active database
1263                  * so that dlmgmtd can recover in the event that it is
1264                  * restarted.
1265                  */
1266                 u64 = linkp->ll_linkid;
1267                 ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
1268 
1269                 if (getzonenamebyid(linkp->ll_zoneid, zname,
1270                     sizeof (zname)) != -1) {
1271                         ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
1272                 }
1273         }
1274         u64 = linkp->ll_class;
1275         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1276         u64 = linkp->ll_media;
1277         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1278 
1279         /*
1280          * The daemon does not keep any active link attribute. Only store the
1281          * attributes if this request is for persistent configuration,
1282          */
1283         if (persist) {
1284                 for (cur_p = linkp->ll_head; cur_p != NULL;
1285                     cur_p = cur_p->lp_next) {
1286                         ptr += translators[cur_p->lp_type].write_func(ptr,
1287                             BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val);
1288                 }
1289         }
1290 
1291         if (ptr <= lim)
1292                 (void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf);
1293 }
1294 
1295 int
1296 dlmgmt_delete_db_entry(dlmgmt_link_t *linkp, uint32_t flags)
1297 {
1298         return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkp->ll_link, linkp,
1299             flags));
1300 }
1301 
1302 int
1303 dlmgmt_write_db_entry(const char *entryname, dlmgmt_link_t *linkp,
1304     uint32_t flags)
1305 {
1306         int err;
1307 
1308         if (flags & DLMGMT_PERSIST) {
1309                 if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1310                     linkp, DLMGMT_PERSIST)) != 0) {
1311                         return (err);
1312                 }
1313         }
1314 
1315         if (flags & DLMGMT_ACTIVE) {
1316                 if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1317                     linkp, DLMGMT_ACTIVE)) != 0) && (flags & DLMGMT_PERSIST)) {
1318                         (void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, entryname,
1319                             linkp, DLMGMT_PERSIST);
1320                         return (err);
1321                 }
1322         }
1323 
1324         return (0);
1325 }
1326 
1327 /*
1328  * Upgrade properties that have link IDs as values to link names.  Because '.'
1329  * is a valid linkname character, the port separater for link aggregations
1330  * must be changed to ':'.
1331  */
1332 static void
1333 linkattr_upgrade(dlmgmt_linkattr_t *attrp)
1334 {
1335         datalink_id_t   linkid;
1336         char            *portidstr;
1337         char            portname[MAXLINKNAMELEN + 1];
1338         dlmgmt_link_t   *linkp;
1339         char            *new_attr_val;
1340         size_t          new_attr_sz;
1341         boolean_t       upgraded = B_FALSE;
1342 
1343         if (strcmp(attrp->lp_name, "linkover") == 0 ||
1344             strcmp(attrp->lp_name, "simnetpeer") == 0) {
1345                 if (attrp->lp_type == DLADM_TYPE_UINT64) {
1346                         linkid = (datalink_id_t)*(uint64_t *)attrp->lp_val;
1347                         if ((linkp = link_by_id(linkid, GLOBAL_ZONEID)) == NULL)
1348                                 return;
1349                         new_attr_sz = strlen(linkp->ll_link) + 1;
1350                         if ((new_attr_val = malloc(new_attr_sz)) == NULL)
1351                                 return;
1352                         (void) strcpy(new_attr_val, linkp->ll_link);
1353                         upgraded = B_TRUE;
1354                 }
1355         } else if (strcmp(attrp->lp_name, "portnames") == 0) {
1356                 /*
1357                  * The old format for "portnames" was
1358                  * "<linkid>.[<linkid>.]...".  The new format is
1359                  * "<linkname>:[<linkname>:]...".
1360                  */
1361                 if (!isdigit(((char *)attrp->lp_val)[0]))
1362                         return;
1363                 new_attr_val = calloc(MAXLINKATTRVALLEN, sizeof (char));
1364                 if (new_attr_val == NULL)
1365                         return;
1366                 portidstr = (char *)attrp->lp_val;
1367                 while (*portidstr != '\0') {
1368                         errno = 0;
1369                         linkid = strtol(portidstr, &portidstr, 10);
1370                         if (linkid == 0 || *portidstr != '.' ||
1371                             (linkp = link_by_id(linkid, GLOBAL_ZONEID)) ==
1372                             NULL) {
1373                                 free(new_attr_val);
1374                                 return;
1375                         }
1376                         (void) snprintf(portname, sizeof (portname), "%s:",
1377                             linkp->ll_link);
1378                         if (strlcat(new_attr_val, portname,
1379                             MAXLINKATTRVALLEN) >= MAXLINKATTRVALLEN) {
1380                                 free(new_attr_val);
1381                                 return;
1382                         }
1383                         /* skip the '.' delimiter */
1384                         portidstr++;
1385                 }
1386                 new_attr_sz = strlen(new_attr_val) + 1;
1387                 upgraded = B_TRUE;
1388         }
1389 
1390         if (upgraded) {
1391                 attrp->lp_type = DLADM_TYPE_STR;
1392                 attrp->lp_sz = new_attr_sz;
1393                 free(attrp->lp_val);
1394                 attrp->lp_val = new_attr_val;
1395         }
1396 }
1397 
1398 static void
1399 dlmgmt_db_upgrade(dlmgmt_link_t *linkp)
1400 {
1401         dlmgmt_linkattr_t *attrp;
1402 
1403         for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next)
1404                 linkattr_upgrade(attrp);
1405 }
1406 
1407 static void
1408 dlmgmt_db_phys_activate(dlmgmt_link_t *linkp)
1409 {
1410         linkp->ll_flags |= DLMGMT_ACTIVE;
1411         (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_ACTIVE);
1412 }
1413 
1414 static void
1415 dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
1416 {
1417         dlmgmt_link_t *linkp;
1418 
1419         for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1420             linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1421                 if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1422                         func(linkp);
1423         }
1424 }
1425 
1426 /*
1427  * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
1428  *
1429  * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
1430  * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
1431  * Because of the upcall architecture of dlmgmtd this can lead to deadlock
1432  * with the following scenario:
1433  *    a) the thread preparing to fork will have acquired the malloc locks
1434  *       then attempt to suspend every thread in preparation to fork.
1435  *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
1436  *       and get the credentials of their caller.
1437  *    c) we can't suspend the in-kernel thread making the upcall.
1438  *
1439  * Thus, we cannot serve door requests because we're blocked in malloc()
1440  * which fork() owns, but fork() is in turn blocked on the in-kernel thread
1441  * making the door upcall.  This is a fundamental architectural problem with
1442  * any server handling upcalls and also trying to fork().
1443  *
1444  * To minimize the chance of this deadlock occuring, we check ahead of time to
1445  * see if the file we want to read actually exists in the zone (which it almost
1446  * never does), so we don't need fork in that case (i.e. rarely to never).
1447  */
1448 static boolean_t
1449 zone_file_exists(char *zoneroot, char *filename)
1450 {
1451         struct stat     sb;
1452         char            fname[MAXPATHLEN];
1453 
1454         (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
1455 
1456         if (stat(fname, &sb) == -1)
1457                 return (B_FALSE);
1458 
1459         return (B_TRUE);
1460 }
1461 
1462 /*
1463  * Initialize the datalink <link name, linkid> mapping and the link's
1464  * attributes list based on the configuration file /etc/dladm/datalink.conf
1465  * and the active configuration cache file
1466  * /etc/svc/volatile/dladm/datalink-management:default.cache.
1467  */
1468 int
1469 dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1470 {
1471         dlmgmt_db_req_t *req;
1472         int             err;
1473         boolean_t       boot = B_FALSE;
1474 
1475         if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1476             DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1477                 return (err);
1478 
1479         /* Handle running in a non-native branded zone (i.e. has /native) */
1480         if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
1481                 char tdir[MAXPATHLEN];
1482 
1483                 (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
1484                 (void) strlcpy(cachefile, tdir, sizeof (cachefile));
1485         }
1486 
1487         if (zone_file_exists(zoneroot, cachefile)) {
1488                 if ((err = dlmgmt_process_db_req(req)) != 0) {
1489                         /*
1490                          * If we get back ENOENT, that means that the active
1491                          * configuration file doesn't exist yet, and is not an
1492                          * error.  We'll create it down below after we've
1493                          * loaded the persistent configuration.
1494                          */
1495                         if (err != ENOENT)
1496                                 goto done;
1497                         boot = B_TRUE;
1498                 }
1499         } else {
1500                 boot = B_TRUE;
1501         }
1502 
1503         if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
1504                 req->ls_flags = DLMGMT_PERSIST;
1505                 err = dlmgmt_process_db_req(req);
1506                 if (err != 0 && err != ENOENT)
1507                         goto done;
1508         }
1509         err = 0;
1510         if (rewrite_needed) {
1511                 /*
1512                  * First update links in memory, then dump the entire db to
1513                  * disk.
1514                  */
1515                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade);
1516                 req->ls_op = DLMGMT_DB_OP_WRITE;
1517                 req->ls_linkid = DATALINK_ALL_LINKID;
1518                 if ((err = dlmgmt_process_db_req(req)) != 0 &&
1519                     err != EINPROGRESS)
1520                         goto done;
1521         }
1522         if (boot) {
1523                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_PHYS,
1524                     dlmgmt_db_phys_activate);
1525         }
1526 
1527 done:
1528         if (err == EINPROGRESS)
1529                 err = 0;
1530         else
1531                 free(req);
1532         return (err);
1533 }
1534 
1535 /*
1536  * Remove all links in the given zoneid.
1537  *
1538  * We do this work in two different passes. In the first pass, we remove any
1539  * entry that hasn't been loaned and mark every entry that has been loaned as
1540  * something that is going to be tombstomed. In the second pass, we drop the
1541  * table lock for every entry and remove the tombstombed entry for our zone.
1542  */
1543 void
1544 dlmgmt_db_fini(zoneid_t zoneid)
1545 {
1546         dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1547 
1548         while (linkp != NULL) {
1549                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1550                 if (linkp->ll_zoneid == zoneid) {
1551                         boolean_t onloan = linkp->ll_onloan;
1552 
1553                         /*
1554                          * Cleanup any VNICs that were loaned to the zone
1555                          * before the zone goes away and we can no longer
1556                          * refer to the VNIC by the name/zoneid.
1557                          */
1558                         if (onloan) {
1559                                 (void) dlmgmt_delete_db_entry(linkp,
1560                                     DLMGMT_ACTIVE);
1561                                 linkp->ll_tomb = B_TRUE;
1562                         } else {
1563                                 (void) dlmgmt_destroy_common(linkp,
1564                                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1565                         }
1566 
1567                 }
1568                 linkp = next_linkp;
1569         }
1570 
1571 again:
1572         linkp = avl_first(&dlmgmt_name_avl);
1573         while (linkp != NULL) {
1574                 vnic_ioc_delete_t ioc;
1575 
1576                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1577 
1578                 if (linkp->ll_zoneid != zoneid) {
1579                         linkp = next_linkp;
1580                         continue;
1581                 }
1582                 ioc.vd_vnic_id = linkp->ll_linkid;
1583                 if (linkp->ll_tomb != B_TRUE)
1584                         abort();
1585 
1586                 /*
1587                  * We have to drop the table lock while going up into the
1588                  * kernel. If we hold the table lock while deleting a vnic, we
1589                  * may get blocked on the mac perimeter and the holder of it may
1590                  * want something from dlmgmtd.
1591                  */
1592                 dlmgmt_table_unlock();
1593 
1594                 if (ioctl(dladm_dld_fd(dld_handle),
1595                     VNIC_IOC_DELETE, &ioc) < 0)
1596                         dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
1597                             "delete VNIC ioctl failed %d %d",
1598                             ioc.vd_vnic_id, errno);
1599 
1600                 /*
1601                  * Even though we've dropped the lock, we know that nothing else
1602                  * could have removed us. Therefore, it should be safe to go
1603                  * through and delete ourselves, but do nothing else. We'll have
1604                  * to restart iteration from the beginning. This can be painful.
1605                  */
1606                 dlmgmt_table_lock(B_TRUE);
1607 
1608                 (void) dlmgmt_destroy_common(linkp,
1609                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1610                 goto again;
1611         }
1612 
1613 }