1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2015, Joyent Inc.
  25  */
  26 
  27 #include <assert.h>
  28 #include <ctype.h>
  29 #include <errno.h>
  30 #include <fcntl.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <strings.h>
  35 #include <syslog.h>
  36 #include <zone.h>
  37 #include <sys/types.h>
  38 #include <sys/stat.h>
  39 #include <stropts.h>
  40 #include <sys/conf.h>
  41 #include <pthread.h>
  42 #include <unistd.h>
  43 #include <wait.h>
  44 #include <libcontract.h>
  45 #include <libcontract_priv.h>
  46 #include <sys/contract/process.h>
  47 #include <sys/vnic.h>
  48 #include <zone.h>
  49 #include "dlmgmt_impl.h"
  50 
  51 typedef enum dlmgmt_db_op {
  52         DLMGMT_DB_OP_WRITE,
  53         DLMGMT_DB_OP_DELETE,
  54         DLMGMT_DB_OP_READ
  55 } dlmgmt_db_op_t;
  56 
  57 typedef struct dlmgmt_db_req_s {
  58         struct dlmgmt_db_req_s  *ls_next;
  59         dlmgmt_db_op_t          ls_op;
  60         char                    ls_link[MAXLINKNAMELEN];
  61         datalink_id_t           ls_linkid;
  62         zoneid_t                ls_zoneid;
  63         uint32_t                ls_flags;       /* Either DLMGMT_ACTIVE or   */
  64                                                 /* DLMGMT_PERSIST, not both. */
  65 } dlmgmt_db_req_t;
  66 
  67 /*
  68  * List of pending db updates (e.g., because of a read-only filesystem).
  69  */
  70 static dlmgmt_db_req_t  *dlmgmt_db_req_head = NULL;
  71 static dlmgmt_db_req_t  *dlmgmt_db_req_tail = NULL;
  72 
  73 /*
  74  * rewrite_needed is set to B_TRUE by process_link_line() if it encounters a
  75  * line with an old format.  This will cause the file being read to be
  76  * re-written with the current format.
  77  */
  78 static boolean_t        rewrite_needed;
  79 
  80 static int              dlmgmt_db_update(dlmgmt_db_op_t, const char *,
  81                             dlmgmt_link_t *, uint32_t);
  82 static int              dlmgmt_process_db_req(dlmgmt_db_req_t *);
  83 static int              dlmgmt_process_db_onereq(dlmgmt_db_req_t *, boolean_t);
  84 static void             *dlmgmt_db_update_thread(void *);
  85 static boolean_t        process_link_line(char *, dlmgmt_link_t *);
  86 static int              process_db_write(dlmgmt_db_req_t *, FILE *, FILE *);
  87 static int              process_db_read(dlmgmt_db_req_t *, FILE *);
  88 static void             generate_link_line(dlmgmt_link_t *, boolean_t, char *);
  89 
  90 #define BUFLEN(lim, ptr)        (((lim) > (ptr)) ? ((lim) - (ptr)) : 0)
  91 #define MAXLINELEN              1024
  92 
  93 typedef void db_walk_func_t(dlmgmt_link_t *);
  94 
  95 /*
  96  * Translator functions to go from dladm_datatype_t to character strings.
  97  * Each function takes a pointer to a buffer, the size of the buffer,
  98  * the name of the attribute, and the value to be written.  The functions
  99  * return the number of bytes written to the buffer.  If the buffer is not big
 100  * enough to hold the string representing the value, then nothing is written
 101  * and 0 is returned.
 102  */
 103 typedef size_t write_func_t(char *, size_t, char *, void *);
 104 
 105 /*
 106  * Translator functions to read from a NULL terminated string buffer into
 107  * something of the given DLADM_TYPE_*.  The functions each return the number
 108  * of bytes read from the string buffer.  If there is an error reading data
 109  * from the buffer, then 0 is returned.  It is the caller's responsibility
 110  * to free the data allocated by these functions.
 111  */
 112 typedef size_t read_func_t(char *, void **);
 113 
 114 typedef struct translator_s {
 115         const char      *type_name;
 116         write_func_t    *write_func;
 117         read_func_t     *read_func;
 118 } translator_t;
 119 
 120 /*
 121  * Translator functions, defined later but declared here so that
 122  * the translator table can be defined.
 123  */
 124 static write_func_t     write_str, write_boolean, write_uint64;
 125 static read_func_t      read_str, read_boolean, read_int64;
 126 
 127 /*
 128  * Translator table, indexed by dladm_datatype_t.
 129  */
 130 static translator_t translators[] = {
 131         { "string",     write_str,      read_str        },
 132         { "boolean",    write_boolean,  read_boolean    },
 133         { "int",        write_uint64,   read_int64      }
 134 };
 135 
 136 static size_t ntranslators = sizeof (translators) / sizeof (translator_t);
 137 
 138 #define LINK_PROPERTY_DELIMINATOR       ";"
 139 #define LINK_PROPERTY_TYPE_VALUE_SEP    ","
 140 #define BASE_PROPERTY_LENGTH(t, n) (strlen(translators[(t)].type_name) +\
 141                                     strlen(LINK_PROPERTY_TYPE_VALUE_SEP) +\
 142                                     strlen(LINK_PROPERTY_DELIMINATOR) +\
 143                                     strlen((n)))
 144 #define GENERATE_PROPERTY_STRING(buf, length, conv, name, type, val) \
 145             (snprintf((buf), (length), "%s=%s%s" conv "%s", (name), \
 146             translators[(type)].type_name, \
 147             LINK_PROPERTY_TYPE_VALUE_SEP, (val), LINK_PROPERTY_DELIMINATOR))
 148 
 149 /*
 150  * Name of the cache file to keep the active <link name, linkid> mapping
 151  */
 152 char    cachefile[MAXPATHLEN];
 153 
 154 #define DLMGMT_PERSISTENT_DB_PATH       "/etc/dladm/datalink.conf"
 155 #define DLMGMT_MAKE_FILE_DB_PATH(buffer, persistent)    \
 156         (void) snprintf((buffer), MAXPATHLEN, "%s", \
 157         (persistent) ? DLMGMT_PERSISTENT_DB_PATH : cachefile);
 158 
 159 typedef struct zopen_arg {
 160         const char      *zopen_modestr;
 161         int             *zopen_pipe;
 162         int             zopen_fd;
 163 } zopen_arg_t;
 164 
 165 typedef struct zrename_arg {
 166         const char      *zrename_newname;
 167 } zrename_arg_t;
 168 
 169 typedef union zfoparg {
 170         zopen_arg_t     zfop_openarg;
 171         zrename_arg_t   zfop_renamearg;
 172 } zfoparg_t;
 173 
 174 typedef struct zfcbarg {
 175         boolean_t       zfarg_inglobalzone; /* is callback in global zone? */
 176         zoneid_t        zfarg_finglobalzone; /* is file in global zone? */
 177         const char      *zfarg_filename;
 178         zfoparg_t       *zfarg_oparg;
 179 } zfarg_t;
 180 #define zfarg_openarg   zfarg_oparg->zfop_openarg
 181 #define zfarg_renamearg zfarg_oparg->zfop_renamearg
 182 
 183 /* zone file callback */
 184 typedef int zfcb_t(zfarg_t *);
 185 
 186 /*
 187  * Execute an operation on filename relative to zoneid's zone root.  If the
 188  * file is in the global zone, then the zfcb() callback will simply be called
 189  * directly.  If the file is in a non-global zone, then zfcb() will be called
 190  * both from the global zone's context, and from the non-global zone's context
 191  * (from a fork()'ed child that has entered the non-global zone).  This is
 192  * done to allow the callback to communicate with itself if needed (e.g. to
 193  * pass back the file descriptor of an opened file).
 194  */
 195 static int
 196 dlmgmt_zfop(const char *filename, zoneid_t zoneid, zfcb_t *zfcb,
 197     zfoparg_t *zfoparg)
 198 {
 199         int             ctfd;
 200         int             err;
 201         pid_t           childpid;
 202         siginfo_t       info;
 203         zfarg_t         zfarg;
 204         ctid_t          ct;
 205 
 206         if (zoneid != GLOBAL_ZONEID) {
 207                 /*
 208                  * We need to access a file that isn't in the global zone.
 209                  * Accessing non-global zone files from the global zone is
 210                  * unsafe (due to symlink attacks), we'll need to fork a child
 211                  * that enters the zone in question and executes the callback
 212                  * that will operate on the file.
 213                  *
 214                  * Before we proceed with this zone tango, we need to create a
 215                  * new process contract for the child, as required by
 216                  * zone_enter().
 217                  */
 218                 errno = 0;
 219                 ctfd = open64("/system/contract/process/template", O_RDWR);
 220                 if (ctfd == -1)
 221                         return (errno);
 222                 if ((err = ct_tmpl_set_critical(ctfd, 0)) != 0 ||
 223                     (err = ct_tmpl_set_informative(ctfd, 0)) != 0 ||
 224                     (err = ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR)) != 0 ||
 225                     (err = ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY)) != 0 ||
 226                     (err = ct_tmpl_activate(ctfd)) != 0) {
 227                         (void) close(ctfd);
 228                         return (err);
 229                 }
 230                 childpid = fork();
 231                 switch (childpid) {
 232                 case -1:
 233                         (void) ct_tmpl_clear(ctfd);
 234                         (void) close(ctfd);
 235                         return (err);
 236                 case 0:
 237                         (void) ct_tmpl_clear(ctfd);
 238                         (void) close(ctfd);
 239                         /*
 240                          * Elevate our privileges as zone_enter() requires all
 241                          * privileges.
 242                          */
 243                         if ((err = dlmgmt_elevate_privileges()) != 0)
 244                                 _exit(err);
 245                         if (zone_enter(zoneid) == -1)
 246                                 _exit(errno);
 247                         if ((err = dlmgmt_drop_privileges()) != 0)
 248                                 _exit(err);
 249                         break;
 250                 default:
 251                         if (contract_latest(&ct) == -1)
 252                                 ct = -1;
 253                         (void) ct_tmpl_clear(ctfd);
 254                         (void) close(ctfd);
 255                         if (waitid(P_PID, childpid, &info, WEXITED) == -1) {
 256                                 (void) contract_abandon_id(ct);
 257                                 return (errno);
 258                         }
 259                         (void) contract_abandon_id(ct);
 260                         if (info.si_status != 0)
 261                                 return (info.si_status);
 262                 }
 263         }
 264 
 265         zfarg.zfarg_inglobalzone = (zoneid == GLOBAL_ZONEID || childpid != 0);
 266         zfarg.zfarg_finglobalzone = (zoneid == GLOBAL_ZONEID);
 267         zfarg.zfarg_filename = filename;
 268         zfarg.zfarg_oparg = zfoparg;
 269         err = zfcb(&zfarg);
 270         if (!zfarg.zfarg_inglobalzone)
 271                 _exit(err);
 272         return (err);
 273 }
 274 
 275 static int
 276 dlmgmt_zopen_cb(zfarg_t *zfarg)
 277 {
 278         struct strrecvfd recvfd;
 279         boolean_t       newfile = B_FALSE;
 280         boolean_t       inglobalzone = zfarg->zfarg_inglobalzone;
 281         zoneid_t        finglobalzone = zfarg->zfarg_finglobalzone;
 282         const char      *filename = zfarg->zfarg_filename;
 283         const char      *modestr = zfarg->zfarg_openarg.zopen_modestr;
 284         int             *p = zfarg->zfarg_openarg.zopen_pipe;
 285         struct stat     statbuf;
 286         int             oflags;
 287         mode_t          mode;
 288         int             fd = -1;
 289         int             err;
 290 
 291         /* We only ever open a file for reading or writing, not both. */
 292         oflags = (modestr[0] == 'r') ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC;
 293         mode = (modestr[0] == 'r') ? 0 : S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 294 
 295         /* Open the file if we're in the same zone as the file. */
 296         if (inglobalzone == finglobalzone) {
 297                 /*
 298                  * First determine if we will be creating the file as part of
 299                  * opening it.  If so, then we'll need to ensure that it has
 300                  * the proper ownership after having opened it.
 301                  */
 302                 if (oflags & O_CREAT) {
 303                         if (stat(filename, &statbuf) == -1) {
 304                                 if (errno == ENOENT)
 305                                         newfile = B_TRUE;
 306                                 else
 307                                         return (errno);
 308                         }
 309                 }
 310                 if ((fd = open(filename, oflags, mode)) == -1)
 311                         return (errno);
 312                 if (newfile) {
 313                         if (chown(filename, UID_DLADM, GID_NETADM) == -1) {
 314                                 err = errno;
 315                                 (void) close(fd);
 316                                 return (err);
 317                         }
 318                 }
 319         }
 320 
 321         /*
 322          * If we're not in the global zone, send the file-descriptor back to
 323          * our parent in the global zone.
 324          */
 325         if (!inglobalzone) {
 326                 assert(!finglobalzone);
 327                 assert(fd != -1);
 328                 return (ioctl(p[1], I_SENDFD, fd) == -1 ? errno : 0);
 329         }
 330 
 331         /*
 332          * At this point, we know we're in the global zone.  If the file was
 333          * in a non-global zone, receive the file-descriptor from our child in
 334          * the non-global zone.
 335          */
 336         if (!finglobalzone) {
 337                 if (ioctl(p[0], I_RECVFD, &recvfd) == -1)
 338                         return (errno);
 339                 fd = recvfd.fd;
 340         }
 341 
 342         zfarg->zfarg_openarg.zopen_fd = fd;
 343         return (0);
 344 }
 345 
 346 static int
 347 dlmgmt_zunlink_cb(zfarg_t *zfarg)
 348 {
 349         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 350                 return (0);
 351         return (unlink(zfarg->zfarg_filename) == 0 ? 0 : errno);
 352 }
 353 
 354 static int
 355 dlmgmt_zrename_cb(zfarg_t *zfarg)
 356 {
 357         if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
 358                 return (0);
 359         return (rename(zfarg->zfarg_filename,
 360             zfarg->zfarg_renamearg.zrename_newname) == 0 ? 0 : errno);
 361 }
 362 
 363 /*
 364  * Same as fopen(3C), except that it opens the file relative to zoneid's zone
 365  * root.
 366  */
 367 static FILE *
 368 dlmgmt_zfopen(const char *filename, const char *modestr, zoneid_t zoneid,
 369     int *err)
 370 {
 371         int             p[2];
 372         zfoparg_t       zfoparg;
 373         FILE            *fp = NULL;
 374 
 375         if (zoneid != GLOBAL_ZONEID && pipe(p) == -1) {
 376                 *err = errno;
 377                 return (NULL);
 378         }
 379 
 380         zfoparg.zfop_openarg.zopen_modestr = modestr;
 381         zfoparg.zfop_openarg.zopen_pipe = p;
 382         *err = dlmgmt_zfop(filename, zoneid, dlmgmt_zopen_cb, &zfoparg);
 383         if (zoneid != GLOBAL_ZONEID) {
 384                 (void) close(p[0]);
 385                 (void) close(p[1]);
 386         }
 387         if (*err == 0) {
 388                 fp = fdopen(zfoparg.zfop_openarg.zopen_fd, modestr);
 389                 if (fp == NULL) {
 390                         *err = errno;
 391                         (void) close(zfoparg.zfop_openarg.zopen_fd);
 392                 }
 393         }
 394         return (fp);
 395 }
 396 
 397 /*
 398  * Same as rename(2), except that old and new are relative to zoneid's zone
 399  * root.
 400  */
 401 static int
 402 dlmgmt_zrename(const char *old, const char *new, zoneid_t zoneid)
 403 {
 404         zfoparg_t zfoparg;
 405 
 406         zfoparg.zfop_renamearg.zrename_newname = new;
 407         return (dlmgmt_zfop(old, zoneid, dlmgmt_zrename_cb, &zfoparg));
 408 }
 409 
 410 /*
 411  * Same as unlink(2), except that filename is relative to zoneid's zone root.
 412  */
 413 static int
 414 dlmgmt_zunlink(const char *filename, zoneid_t zoneid)
 415 {
 416         return (dlmgmt_zfop(filename, zoneid, dlmgmt_zunlink_cb, NULL));
 417 }
 418 
 419 static size_t
 420 write_str(char *buffer, size_t buffer_length, char *name, void *value)
 421 {
 422         char    *ptr = value;
 423         size_t  data_length = strnlen(ptr, buffer_length);
 424 
 425         /*
 426          * Strings are assumed to be NULL terminated.  In order to fit in
 427          * the buffer, the string's length must be less then buffer_length.
 428          * If the value is empty, there's no point in writing it, in fact,
 429          * we shouldn't even see that case.
 430          */
 431         if (data_length + BASE_PROPERTY_LENGTH(DLADM_TYPE_STR, name) ==
 432             buffer_length || data_length == 0)
 433                 return (0);
 434 
 435         /*
 436          * Since we know the string will fit in the buffer, snprintf will
 437          * always return less than buffer_length, so we can just return
 438          * whatever snprintf returns.
 439          */
 440         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%s",
 441             name, DLADM_TYPE_STR, ptr));
 442 }
 443 
 444 static size_t
 445 write_boolean(char *buffer, size_t buffer_length, char *name, void *value)
 446 {
 447         boolean_t       *ptr = value;
 448 
 449         /*
 450          * Booleans are either zero or one, so we only need room for two
 451          * characters in the buffer.
 452          */
 453         if (buffer_length <= 1 + BASE_PROPERTY_LENGTH(DLADM_TYPE_BOOLEAN, name))
 454                 return (0);
 455 
 456         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%d",
 457             name, DLADM_TYPE_BOOLEAN, *ptr));
 458 }
 459 
 460 static size_t
 461 write_uint64(char *buffer, size_t buffer_length, char *name, void *value)
 462 {
 463         uint64_t        *ptr = value;
 464 
 465         /*
 466          * Limit checking for uint64_t is a little trickier.
 467          */
 468         if (snprintf(NULL, 0, "%lld", *ptr)  +
 469             BASE_PROPERTY_LENGTH(DLADM_TYPE_UINT64, name) >= buffer_length)
 470                 return (0);
 471 
 472         return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%lld",
 473             name, DLADM_TYPE_UINT64, *ptr));
 474 }
 475 
 476 static size_t
 477 read_str(char *buffer, void **value)
 478 {
 479         char            *ptr = calloc(MAXLINKATTRVALLEN, sizeof (char));
 480         ssize_t         len;
 481 
 482         if (ptr == NULL || (len = strlcpy(ptr, buffer, MAXLINKATTRVALLEN))
 483             >= MAXLINKATTRVALLEN) {
 484                 free(ptr);
 485                 return (0);
 486         }
 487 
 488         *(char **)value = ptr;
 489 
 490         /* Account for NULL terminator */
 491         return (len + 1);
 492 }
 493 
 494 static size_t
 495 read_boolean(char *buffer, void **value)
 496 {
 497         boolean_t       *ptr = calloc(1, sizeof (boolean_t));
 498 
 499         if (ptr == NULL)
 500                 return (0);
 501 
 502         *ptr = atoi(buffer);
 503         *(boolean_t **)value = ptr;
 504 
 505         return (sizeof (boolean_t));
 506 }
 507 
 508 static size_t
 509 read_int64(char *buffer, void **value)
 510 {
 511         int64_t *ptr = calloc(1, sizeof (int64_t));
 512 
 513         if (ptr == NULL)
 514                 return (0);
 515 
 516         *ptr = (int64_t)atoll(buffer);
 517         *(int64_t **)value = ptr;
 518 
 519         return (sizeof (int64_t));
 520 }
 521 
 522 static dlmgmt_db_req_t *
 523 dlmgmt_db_req_alloc(dlmgmt_db_op_t op, const char *linkname,
 524     datalink_id_t linkid, zoneid_t zoneid, uint32_t flags, int *err)
 525 {
 526         dlmgmt_db_req_t *req;
 527 
 528         if ((req = calloc(1, sizeof (dlmgmt_db_req_t))) == NULL) {
 529                 *err = errno;
 530         } else {
 531                 req->ls_op = op;
 532                 if (linkname != NULL)
 533                         (void) strlcpy(req->ls_link, linkname, MAXLINKNAMELEN);
 534                 req->ls_linkid = linkid;
 535                 req->ls_zoneid = zoneid;
 536                 req->ls_flags = flags;
 537         }
 538         return (req);
 539 }
 540 
 541 /*
 542  * Update the db entry with name "entryname" using information from "linkp".
 543  */
 544 static int
 545 dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
 546     uint32_t flags)
 547 {
 548         dlmgmt_db_req_t *req;
 549         int             err;
 550 
 551         /* It is either a persistent request or an active request, not both. */
 552         assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE));
 553 
 554         if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
 555             linkp->ll_zoneid, flags, &err)) == NULL)
 556                 return (err);
 557 
 558         /*
 559          * If the return error is EINPROGRESS, this request is handled
 560          * asynchronously; return success.
 561          */
 562         err = dlmgmt_process_db_req(req);
 563         if (err != EINPROGRESS)
 564                 free(req);
 565         else
 566                 err = 0;
 567         return (err);
 568 }
 569 
 570 #define DLMGMT_DB_OP_STR(op)                                    \
 571         (((op) == DLMGMT_DB_OP_READ) ? "read" :                 \
 572         (((op) == DLMGMT_DB_OP_WRITE) ? "write" : "delete"))
 573 
 574 #define DLMGMT_DB_CONF_STR(flag)                                \
 575         (((flag) == DLMGMT_ACTIVE) ? "active" :                 \
 576         (((flag) == DLMGMT_PERSIST) ? "persistent" : ""))
 577 
 578 static int
 579 dlmgmt_process_db_req(dlmgmt_db_req_t *req)
 580 {
 581         pthread_t       tid;
 582         boolean_t       writeop;
 583         int             err;
 584 
 585         /*
 586          * If there are already pending "write" requests, queue this request in
 587          * the pending list.  Note that this function is called while the
 588          * dlmgmt_rw_lock is held, so it is safe to access the global variables.
 589          */
 590         writeop = (req->ls_op != DLMGMT_DB_OP_READ);
 591         if (writeop && (req->ls_flags == DLMGMT_PERSIST) &&
 592             (dlmgmt_db_req_head != NULL)) {
 593                 dlmgmt_db_req_tail->ls_next = req;
 594                 dlmgmt_db_req_tail = req;
 595                 return (EINPROGRESS);
 596         }
 597 
 598         err = dlmgmt_process_db_onereq(req, writeop);
 599         if (err != EINPROGRESS && err != 0 && err != ENOENT) {
 600                 /*
 601                  * Log the error unless the request processing is still in
 602                  * progress or if the configuration file hasn't been created
 603                  * yet (ENOENT).
 604                  */
 605                 dlmgmt_log(LOG_WARNING, "dlmgmt_process_db_onereq() %s "
 606                     "operation on %s configuration failed: %s",
 607                     DLMGMT_DB_OP_STR(req->ls_op),
 608                     DLMGMT_DB_CONF_STR(req->ls_flags), strerror(err));
 609         }
 610 
 611         if (err == EINPROGRESS) {
 612                 assert(req->ls_flags == DLMGMT_PERSIST);
 613                 assert(writeop && dlmgmt_db_req_head == NULL);
 614                 dlmgmt_db_req_tail = dlmgmt_db_req_head = req;
 615                 err = pthread_create(&tid, NULL, dlmgmt_db_update_thread, NULL);
 616                 if (err == 0)
 617                         return (EINPROGRESS);
 618         }
 619         return (err);
 620 }
 621 
 622 static int
 623 dlmgmt_process_db_onereq(dlmgmt_db_req_t *req, boolean_t writeop)
 624 {
 625         int     err = 0;
 626         FILE    *fp, *nfp = NULL;
 627         char    file[MAXPATHLEN];
 628         char    newfile[MAXPATHLEN];
 629 
 630         DLMGMT_MAKE_FILE_DB_PATH(file, (req->ls_flags == DLMGMT_PERSIST));
 631         fp = dlmgmt_zfopen(file, "r", req->ls_zoneid, &err);
 632         /*
 633          * Note that it is not an error if the file doesn't exist.  If we're
 634          * reading, we treat this case the same way as an empty file.  If
 635          * we're writing, the file will be created when we open the file for
 636          * writing below.
 637          */
 638         if (fp == NULL && !writeop)
 639                 return (err);
 640 
 641         if (writeop) {
 642                 (void) snprintf(newfile, MAXPATHLEN, "%s.new", file);
 643                 nfp = dlmgmt_zfopen(newfile, "w", req->ls_zoneid, &err);
 644                 if (nfp == NULL) {
 645                         /*
 646                          * EROFS can happen at boot when the file system is
 647                          * read-only.  Return EINPROGRESS so that the caller
 648                          * can add this request to the pending request list
 649                          * and start a retry thread.
 650                          */
 651                         err = (errno == EROFS ? EINPROGRESS : errno);
 652                         goto done;
 653                 }
 654         }
 655         if (writeop) {
 656                 if ((err = process_db_write(req, fp, nfp)) == 0)
 657                         err = dlmgmt_zrename(newfile, file, req->ls_zoneid);
 658         } else {
 659                 err = process_db_read(req, fp);
 660         }
 661 
 662 done:
 663         if (nfp != NULL) {
 664                 (void) fclose(nfp);
 665                 if (err != 0)
 666                         (void) dlmgmt_zunlink(newfile, req->ls_zoneid);
 667         }
 668         (void) fclose(fp);
 669         return (err);
 670 }
 671 
 672 /*ARGSUSED*/
 673 static void *
 674 dlmgmt_db_update_thread(void *arg)
 675 {
 676         dlmgmt_db_req_t *req;
 677 
 678         dlmgmt_table_lock(B_TRUE);
 679 
 680         assert(dlmgmt_db_req_head != NULL);
 681         while ((req = dlmgmt_db_req_head) != NULL) {
 682                 assert(req->ls_flags == DLMGMT_PERSIST);
 683                 if (dlmgmt_process_db_onereq(req, B_TRUE) == EINPROGRESS) {
 684                         /*
 685                          * The filesystem is still read only. Go to sleep and
 686                          * try again.
 687                          */
 688                         dlmgmt_table_unlock();
 689                         (void) sleep(5);
 690                         dlmgmt_table_lock(B_TRUE);
 691                         continue;
 692                 }
 693 
 694                 /*
 695                  * The filesystem is no longer read only. Continue processing
 696                  * and remove the request from the pending list.
 697                  */
 698                 dlmgmt_db_req_head = req->ls_next;
 699                 if (dlmgmt_db_req_tail == req) {
 700                         assert(dlmgmt_db_req_head == NULL);
 701                         dlmgmt_db_req_tail = NULL;
 702                 }
 703                 free(req);
 704         }
 705 
 706         dlmgmt_table_unlock();
 707         return (NULL);
 708 }
 709 
 710 static int
 711 parse_linkprops(char *buf, dlmgmt_link_t *linkp)
 712 {
 713         boolean_t               found_type = B_FALSE;
 714         dladm_datatype_t        type = DLADM_TYPE_STR;
 715         int                     i, len;
 716         char                    *curr;
 717         char                    attr_name[MAXLINKATTRLEN];
 718         size_t                  attr_buf_len = 0;
 719         void                    *attr_buf = NULL;
 720         boolean_t               rename;
 721 
 722         curr = buf;
 723         len = strlen(buf);
 724         attr_name[0] = '\0';
 725         for (i = 0; i < len; i++) {
 726                 char            c = buf[i];
 727                 boolean_t       match = (c == '=' ||
 728                     (c == ',' && !found_type) || c == ';');
 729 
 730                 rename = B_FALSE;
 731                 /*
 732                  * Move to the next character if there is no match and
 733                  * if we have not reached the last character.
 734                  */
 735                 if (!match && i != len - 1)
 736                         continue;
 737 
 738                 if (match) {
 739                         /*
 740                          * NUL-terminate the string pointed to by 'curr'.
 741                          */
 742                         buf[i] = '\0';
 743                         if (*curr == '\0')
 744                                 goto parse_fail;
 745                 }
 746 
 747                 if (attr_name[0] != '\0' && found_type) {
 748                         /*
 749                          * We get here after we have processed the "<prop>="
 750                          * pattern. The pattern we are now interested in is
 751                          * "<val>;".
 752                          */
 753                         if (c == '=')
 754                                 goto parse_fail;
 755 
 756                         if (strcmp(attr_name, "linkid") == 0) {
 757                                 if (read_int64(curr, &attr_buf) == 0)
 758                                         goto parse_fail;
 759                                 linkp->ll_linkid =
 760                                     (datalink_class_t)*(int64_t *)attr_buf;
 761                         } else if (strcmp(attr_name, "name") == 0) {
 762                                 if (read_str(curr, &attr_buf) == 0)
 763                                         goto parse_fail;
 764                                 (void) snprintf(linkp->ll_link,
 765                                     MAXLINKNAMELEN, "%s", attr_buf);
 766                         } else if (strcmp(attr_name, "class") == 0) {
 767                                 if (read_int64(curr, &attr_buf) == 0)
 768                                         goto parse_fail;
 769                                 linkp->ll_class =
 770                                     (datalink_class_t)*(int64_t *)attr_buf;
 771                         } else if (strcmp(attr_name, "media") == 0) {
 772                                 if (read_int64(curr, &attr_buf) == 0)
 773                                         goto parse_fail;
 774                                 linkp->ll_media =
 775                                     (uint32_t)*(int64_t *)attr_buf;
 776                         } else if (strcmp(attr_name, "zone") == 0) {
 777                                 if (read_str(curr, &attr_buf) == 0)
 778                                         goto parse_fail;
 779                                 linkp->ll_zoneid = getzoneidbyname(attr_buf);
 780                                 if (linkp->ll_zoneid == -1) {
 781                                         if (errno == EFAULT)
 782                                                 abort();
 783                                         /*
 784                                          * If we can't find the zone, assign the
 785                                          * link to the GZ and mark it for being
 786                                          * renamed.
 787                                          */
 788                                         linkp->ll_zoneid = 0;
 789                                         rename = B_TRUE;
 790                                 }
 791                         } else {
 792                                 attr_buf_len = translators[type].read_func(curr,
 793                                     &attr_buf);
 794                                 if (attr_buf_len == 0)
 795                                         goto parse_fail;
 796 
 797                                 if (linkattr_set(&(linkp->ll_head), attr_name,
 798                                     attr_buf, attr_buf_len, type) != 0) {
 799                                         free(attr_buf);
 800                                         goto parse_fail;
 801                                 }
 802                         }
 803 
 804                         free(attr_buf);
 805                         attr_name[0] = '\0';
 806                         found_type = B_FALSE;
 807                 } else if (attr_name[0] != '\0') {
 808                         /*
 809                          * Non-zero length attr_name and found_type of false
 810                          * indicates that we have not found the type for this
 811                          * attribute.  The pattern now is "<type>,<val>;", we
 812                          * want the <type> part of the pattern.
 813                          */
 814                         for (type = 0; type < ntranslators; type++) {
 815                                 if (strcmp(curr,
 816                                     translators[type].type_name) == 0) {
 817                                         found_type = B_TRUE;
 818                                         break;
 819                                 }
 820                         }
 821 
 822                         if (!found_type)
 823                                 goto parse_fail;
 824                 } else {
 825                         /*
 826                          * A zero length attr_name indicates we are looking
 827                          * at the beginning of a link attribute.
 828                          */
 829                         if (c != '=')
 830                                 goto parse_fail;
 831 
 832                         (void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
 833                 }
 834 
 835                 /*
 836                  * The zone that this link belongs to has died, we are
 837                  * reparenting it to the GZ and renaming it to avoid name
 838                  * collisions.
 839                  */
 840                 if (rename == B_TRUE) {
 841                         (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
 842                             "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
 843                 }
 844                 curr = buf + i + 1;
 845         }
 846 
 847         /* Correct any erroneous IPTUN datalink class constant in the file */
 848         if (linkp->ll_class == 0x60) {
 849                 linkp->ll_class = DATALINK_CLASS_IPTUN;
 850                 rewrite_needed = B_TRUE;
 851         }
 852 
 853         return (0);
 854 
 855 parse_fail:
 856         /*
 857          * Free linkp->ll_head (link attribute list)
 858          */
 859         linkattr_destroy(linkp);
 860         return (-1);
 861 }
 862 
 863 static boolean_t
 864 process_link_line(char *buf, dlmgmt_link_t *linkp)
 865 {
 866         int     i, len, llen;
 867         char    *str, *lasts;
 868         char    tmpbuf[MAXLINELEN];
 869 
 870         bzero(linkp, sizeof (*linkp));
 871         linkp->ll_linkid = DATALINK_INVALID_LINKID;
 872 
 873         /*
 874          * Use a copy of buf for parsing so that we can do whatever we want.
 875          */
 876         (void) strlcpy(tmpbuf, buf, MAXLINELEN);
 877 
 878         /*
 879          * Skip leading spaces, blank lines, and comments.
 880          */
 881         len = strlen(tmpbuf);
 882         for (i = 0; i < len; i++) {
 883                 if (!isspace(tmpbuf[i]))
 884                         break;
 885         }
 886         if (i == len || tmpbuf[i] == '#')
 887                 return (B_TRUE);
 888 
 889         str = tmpbuf + i;
 890         /*
 891          * Find the link name and assign it to the link structure.
 892          */
 893         if (strtok_r(str, " \n\t", &lasts) == NULL)
 894                 goto fail;
 895 
 896         llen = strlen(str);
 897         /*
 898          * Note that a previous version of the persistent datalink.conf file
 899          * stored the linkid as the first field.  In that case, the name will
 900          * be obtained through parse_linkprops from a property with the format
 901          * "name=<linkname>".  If we encounter such a format, we set
 902          * rewrite_needed so that dlmgmt_db_init() can rewrite the file with
 903          * the new format after it's done reading in the data.
 904          */
 905         if (isdigit(str[0])) {
 906                 linkp->ll_linkid = atoi(str);
 907                 rewrite_needed = B_TRUE;
 908         } else {
 909                 if (strlcpy(linkp->ll_link, str, sizeof (linkp->ll_link)) >=
 910                     sizeof (linkp->ll_link))
 911                         goto fail;
 912         }
 913 
 914         str += llen + 1;
 915         if (str >= tmpbuf + len)
 916                 goto fail;
 917 
 918         /*
 919          * Now find the list of link properties.
 920          */
 921         if ((str = strtok_r(str, " \n\t", &lasts)) == NULL)
 922                 goto fail;
 923 
 924         if (parse_linkprops(str, linkp) < 0)
 925                 goto fail;
 926 
 927         return (B_TRUE);
 928 
 929 fail:
 930         /*
 931          * Delete corrupted line.
 932          */
 933         buf[0] = '\0';
 934         return (B_FALSE);
 935 }
 936 
 937 /*
 938  * Find any properties in linkp that refer to "old", and rename to "new".
 939  * Return B_TRUE if any renaming occurred.
 940  */
 941 static int
 942 dlmgmt_attr_rename(dlmgmt_link_t *linkp, const char *old, const char *new,
 943     boolean_t *renamed)
 944 {
 945         dlmgmt_linkattr_t       *attrp;
 946         char                    *newval = NULL, *pname;
 947         char                    valcp[MAXLINKATTRVALLEN];
 948         size_t                  newsize;
 949 
 950         *renamed = B_FALSE;
 951 
 952         if ((attrp = linkattr_find(linkp->ll_head, "linkover")) != NULL ||
 953             (attrp = linkattr_find(linkp->ll_head, "simnetpeer")) != NULL) {
 954                 if (strcmp(old, (char *)attrp->lp_val) == 0) {
 955                         newsize = strlen(new) + 1;
 956                         if ((newval = malloc(newsize)) == NULL)
 957                                 return (errno);
 958                         (void) strcpy(newval, new);
 959                         free(attrp->lp_val);
 960                         attrp->lp_val = newval;
 961                         attrp->lp_sz = newsize;
 962                         *renamed = B_TRUE;
 963                 }
 964                 return (0);
 965         }
 966 
 967         if ((attrp = linkattr_find(linkp->ll_head, "portnames")) == NULL)
 968                 return (0);
 969 
 970         /* <linkname>:[<linkname>:]... */
 971         if ((newval = calloc(MAXLINKATTRVALLEN, sizeof (char))) == NULL)
 972                 return (errno);
 973 
 974         bcopy(attrp->lp_val, valcp, sizeof (valcp));
 975         pname = strtok(valcp, ":");
 976         while (pname != NULL) {
 977                 if (strcmp(pname, old) == 0) {
 978                         (void) strcat(newval, new);
 979                         *renamed = B_TRUE;
 980                 } else {
 981                         (void) strcat(newval, pname);
 982                 }
 983                 (void) strcat(newval, ":");
 984                 pname = strtok(NULL, ":");
 985         }
 986         if (*renamed) {
 987                 free(attrp->lp_val);
 988                 attrp->lp_val = newval;
 989                 attrp->lp_sz = strlen(newval) + 1;
 990         } else {
 991                 free(newval);
 992         }
 993         return (0);
 994 }
 995 
 996 static int
 997 process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp)
 998 {
 999         boolean_t               done = B_FALSE;
1000         int                     err = 0;
1001         dlmgmt_link_t           link_in_file, *linkp = NULL, *dblinkp;
1002         boolean_t               persist = (req->ls_flags == DLMGMT_PERSIST);
1003         boolean_t               writeall, rename, attr_renamed;
1004         char                    buf[MAXLINELEN];
1005 
1006         writeall = (req->ls_linkid == DATALINK_ALL_LINKID);
1007 
1008         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall) {
1009                 /*
1010                  * find the link in the avl tree with the given linkid.
1011                  */
1012                 linkp = link_by_id(req->ls_linkid, req->ls_zoneid);
1013                 if (linkp == NULL || (linkp->ll_flags & req->ls_flags) == 0) {
1014                         /*
1015                          * This link has already been changed. This could
1016                          * happen if the request is pending because of
1017                          * read-only file-system. If so, we are done.
1018                          */
1019                         return (0);
1020                 }
1021                 /*
1022                  * In the case of a rename, linkp's name has been updated to
1023                  * the new name, and req->ls_link is the old link name.
1024                  */
1025                 rename = (strcmp(req->ls_link, linkp->ll_link) != 0);
1026         }
1027 
1028         /*
1029          * fp can be NULL if the file didn't initially exist and we're
1030          * creating it as part of this write operation.
1031          */
1032         if (fp == NULL)
1033                 goto write;
1034 
1035         while (err == 0 && fgets(buf, sizeof (buf), fp) != NULL &&
1036             process_link_line(buf, &link_in_file)) {
1037                 /*
1038                  * Only the link name is needed. Free the memory allocated for
1039                  * the link attributes list of link_in_file.
1040                  */
1041                 linkattr_destroy(&link_in_file);
1042 
1043                 if (link_in_file.ll_link[0] == '\0' || done) {
1044                         /*
1045                          * this is a comment line or we are done updating the
1046                          * line for the specified link, write the rest of
1047                          * lines out.
1048                          */
1049                         if (fputs(buf, nfp) == EOF)
1050                                 err = errno;
1051                         continue;
1052                 }
1053 
1054                 switch (req->ls_op) {
1055                 case DLMGMT_DB_OP_WRITE:
1056                         /*
1057                          * For write operations, we generate a new output line
1058                          * if we're either writing all links (writeall) or if
1059                          * the name of the link in the file matches the one
1060                          * we're looking for.  Otherwise, we write out the
1061                          * buffer as-is.
1062                          *
1063                          * If we're doing a rename operation, ensure that any
1064                          * references to the link being renamed in link
1065                          * properties are also updated before we write
1066                          * anything.
1067                          */
1068                         if (writeall) {
1069                                 linkp = link_by_name(link_in_file.ll_link,
1070                                     req->ls_zoneid);
1071                         }
1072                         if (writeall || strcmp(req->ls_link,
1073                             link_in_file.ll_link) == 0) {
1074                                 generate_link_line(linkp, persist, buf);
1075                                 if (!writeall && !rename)
1076                                         done = B_TRUE;
1077                         } else if (rename && persist) {
1078                                 dblinkp = link_by_name(link_in_file.ll_link,
1079                                     req->ls_zoneid);
1080                                 err = dlmgmt_attr_rename(dblinkp, req->ls_link,
1081                                     linkp->ll_link, &attr_renamed);
1082                                 if (err != 0)
1083                                         break;
1084                                 if (attr_renamed) {
1085                                         generate_link_line(dblinkp, persist,
1086                                             buf);
1087                                 }
1088                         }
1089                         if (fputs(buf, nfp) == EOF)
1090                                 err = errno;
1091                         break;
1092                 case DLMGMT_DB_OP_DELETE:
1093                         /*
1094                          * Delete is simple.  If buf does not represent the
1095                          * link we're deleting, write it out.
1096                          */
1097                         if (strcmp(req->ls_link, link_in_file.ll_link) != 0) {
1098                                 if (fputs(buf, nfp) == EOF)
1099                                         err = errno;
1100                         } else {
1101                                 done = B_TRUE;
1102                         }
1103                         break;
1104                 case DLMGMT_DB_OP_READ:
1105                 default:
1106                         err = EINVAL;
1107                         break;
1108                 }
1109         }
1110 
1111 write:
1112         /*
1113          * If we get to the end of the file and have not seen what linkid
1114          * points to, write it out then.
1115          */
1116         if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall && !rename && !done) {
1117                 generate_link_line(linkp, persist, buf);
1118                 done = B_TRUE;
1119                 if (fputs(buf, nfp) == EOF)
1120                         err = errno;
1121         }
1122 
1123         return (err);
1124 }
1125 
1126 static int
1127 process_db_read(dlmgmt_db_req_t *req, FILE *fp)
1128 {
1129         avl_index_t     name_where, id_where;
1130         dlmgmt_link_t   link_in_file, *newlink, *link_in_db;
1131         char            buf[MAXLINELEN];
1132         int             err = 0;
1133 
1134         /*
1135          * This loop processes each line of the configuration file.
1136          */
1137         while (fgets(buf, MAXLINELEN, fp) != NULL) {
1138                 if (!process_link_line(buf, &link_in_file)) {
1139                         err = EINVAL;
1140                         break;
1141                 }
1142 
1143                 /*
1144                  * Skip the comment line.
1145                  */
1146                 if (link_in_file.ll_link[0] == '\0') {
1147                         linkattr_destroy(&link_in_file);
1148                         continue;
1149                 }
1150 
1151                 if ((req->ls_flags & DLMGMT_ACTIVE) &&
1152                     link_in_file.ll_linkid == DATALINK_INVALID_LINKID) {
1153                         linkattr_destroy(&link_in_file);
1154                         continue;
1155                 }
1156 
1157                 link_in_file.ll_zoneid = req->ls_zoneid;
1158                 link_in_db = link_by_name(link_in_file.ll_link,
1159                     link_in_file.ll_zoneid);
1160                 if (link_in_db != NULL) {
1161                         /*
1162                          * If the link in the database already has the flag
1163                          * for this request set, then the entry is a
1164                          * duplicate.  If it's not a duplicate, then simply
1165                          * turn on the appropriate flag on the existing link.
1166                          */
1167                         if (link_in_db->ll_flags & req->ls_flags) {
1168                                 dlmgmt_log(LOG_WARNING, "Duplicate links "
1169                                     "in the repository: %s",
1170                                     link_in_file.ll_link);
1171                                 linkattr_destroy(&link_in_file);
1172                         } else {
1173                                 if (req->ls_flags & DLMGMT_PERSIST) {
1174                                         /*
1175                                          * Save the newly read properties into
1176                                          * the existing link.
1177                                          */
1178                                         assert(link_in_db->ll_head == NULL);
1179                                         link_in_db->ll_head =
1180                                             link_in_file.ll_head;
1181                                 } else {
1182                                         linkattr_destroy(&link_in_file);
1183                                 }
1184                                 link_in_db->ll_flags |= req->ls_flags;
1185                         }
1186                 } else {
1187                         /*
1188                          * This is a new link.  Allocate a new dlmgmt_link_t
1189                          * and add it to the trees.
1190                          */
1191                         newlink = calloc(1, sizeof (*newlink));
1192                         if (newlink == NULL) {
1193                                 dlmgmt_log(LOG_WARNING, "Unable to allocate "
1194                                     "memory to create new link %s",
1195                                     link_in_file.ll_link);
1196                                 linkattr_destroy(&link_in_file);
1197                                 continue;
1198                         }
1199                         bcopy(&link_in_file, newlink, sizeof (*newlink));
1200 
1201                         if (newlink->ll_linkid == DATALINK_INVALID_LINKID)
1202                                 newlink->ll_linkid = dlmgmt_nextlinkid;
1203                         if (avl_find(&dlmgmt_id_avl, newlink, &id_where) !=
1204                             NULL) {
1205                                 dlmgmt_log(LOG_WARNING, "Link ID %d is already"
1206                                     " in use, destroying link %s",
1207                                     newlink->ll_linkid, newlink->ll_link);
1208                                 link_destroy(newlink);
1209                                 continue;
1210                         }
1211 
1212                         if ((req->ls_flags & DLMGMT_ACTIVE) &&
1213                             link_activate(newlink) != 0) {
1214                                 dlmgmt_log(LOG_WARNING, "Unable to activate %s",
1215                                     newlink->ll_link);
1216                                 link_destroy(newlink);
1217                                 continue;
1218                         }
1219 
1220                         avl_insert(&dlmgmt_id_avl, newlink, id_where);
1221                         /*
1222                          * link_activate call above can insert newlink in
1223                          * dlmgmt_name_avl tree when activating a link that is
1224                          * assigned to a NGZ.
1225                          */
1226                         if (avl_find(&dlmgmt_name_avl, newlink,
1227                             &name_where) == NULL)
1228                                 avl_insert(&dlmgmt_name_avl, newlink,
1229                                     name_where);
1230 
1231                         dlmgmt_advance(newlink);
1232                         newlink->ll_flags |= req->ls_flags;
1233                 }
1234         }
1235 
1236         return (err);
1237 }
1238 
1239 /*
1240  * Generate an entry in the link database.
1241  * Each entry has this format:
1242  * <link name>    <prop0>=<type>,<val>;...;<propn>=<type>,<val>;
1243  */
1244 static void
1245 generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1246 {
1247         char                    tmpbuf[MAXLINELEN];
1248         char                    *ptr = tmpbuf;
1249         char                    *lim = tmpbuf + MAXLINELEN;
1250         dlmgmt_linkattr_t       *cur_p = NULL;
1251         uint64_t                u64;
1252 
1253         ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1254         if (!persist) {
1255                 char zname[ZONENAME_MAX];
1256                 /*
1257                  * We store the linkid and the zone name in the active database
1258                  * so that dlmgmtd can recover in the event that it is
1259                  * restarted.
1260                  */
1261                 u64 = linkp->ll_linkid;
1262                 ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
1263 
1264                 if (getzonenamebyid(linkp->ll_zoneid, zname,
1265                     sizeof (zname)) != -1) {
1266                         ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
1267                 }
1268         }
1269         u64 = linkp->ll_class;
1270         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1271         u64 = linkp->ll_media;
1272         ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1273 
1274         /*
1275          * The daemon does not keep any active link attribute. Only store the
1276          * attributes if this request is for persistent configuration,
1277          */
1278         if (persist) {
1279                 for (cur_p = linkp->ll_head; cur_p != NULL;
1280                     cur_p = cur_p->lp_next) {
1281                         ptr += translators[cur_p->lp_type].write_func(ptr,
1282                             BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val);
1283                 }
1284         }
1285 
1286         if (ptr <= lim)
1287                 (void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf);
1288 }
1289 
1290 int
1291 dlmgmt_delete_db_entry(dlmgmt_link_t *linkp, uint32_t flags)
1292 {
1293         return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkp->ll_link, linkp,
1294             flags));
1295 }
1296 
1297 int
1298 dlmgmt_write_db_entry(const char *entryname, dlmgmt_link_t *linkp,
1299     uint32_t flags)
1300 {
1301         int err;
1302 
1303         if (flags & DLMGMT_PERSIST) {
1304                 if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1305                     linkp, DLMGMT_PERSIST)) != 0) {
1306                         return (err);
1307                 }
1308         }
1309 
1310         if (flags & DLMGMT_ACTIVE) {
1311                 if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1312                     linkp, DLMGMT_ACTIVE)) != 0) && (flags & DLMGMT_PERSIST)) {
1313                         (void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, entryname,
1314                             linkp, DLMGMT_PERSIST);
1315                         return (err);
1316                 }
1317         }
1318 
1319         return (0);
1320 }
1321 
1322 /*
1323  * Upgrade properties that have link IDs as values to link names.  Because '.'
1324  * is a valid linkname character, the port separater for link aggregations
1325  * must be changed to ':'.
1326  */
1327 static void
1328 linkattr_upgrade(dlmgmt_linkattr_t *attrp)
1329 {
1330         datalink_id_t   linkid;
1331         char            *portidstr;
1332         char            portname[MAXLINKNAMELEN + 1];
1333         dlmgmt_link_t   *linkp;
1334         char            *new_attr_val;
1335         size_t          new_attr_sz;
1336         boolean_t       upgraded = B_FALSE;
1337 
1338         if (strcmp(attrp->lp_name, "linkover") == 0 ||
1339             strcmp(attrp->lp_name, "simnetpeer") == 0) {
1340                 if (attrp->lp_type == DLADM_TYPE_UINT64) {
1341                         linkid = (datalink_id_t)*(uint64_t *)attrp->lp_val;
1342                         if ((linkp = link_by_id(linkid, GLOBAL_ZONEID)) == NULL)
1343                                 return;
1344                         new_attr_sz = strlen(linkp->ll_link) + 1;
1345                         if ((new_attr_val = malloc(new_attr_sz)) == NULL)
1346                                 return;
1347                         (void) strcpy(new_attr_val, linkp->ll_link);
1348                         upgraded = B_TRUE;
1349                 }
1350         } else if (strcmp(attrp->lp_name, "portnames") == 0) {
1351                 /*
1352                  * The old format for "portnames" was
1353                  * "<linkid>.[<linkid>.]...".  The new format is
1354                  * "<linkname>:[<linkname>:]...".
1355                  */
1356                 if (!isdigit(((char *)attrp->lp_val)[0]))
1357                         return;
1358                 new_attr_val = calloc(MAXLINKATTRVALLEN, sizeof (char));
1359                 if (new_attr_val == NULL)
1360                         return;
1361                 portidstr = (char *)attrp->lp_val;
1362                 while (*portidstr != '\0') {
1363                         errno = 0;
1364                         linkid = strtol(portidstr, &portidstr, 10);
1365                         if (linkid == 0 || *portidstr != '.' ||
1366                             (linkp = link_by_id(linkid, GLOBAL_ZONEID)) ==
1367                             NULL) {
1368                                 free(new_attr_val);
1369                                 return;
1370                         }
1371                         (void) snprintf(portname, sizeof (portname), "%s:",
1372                             linkp->ll_link);
1373                         if (strlcat(new_attr_val, portname,
1374                             MAXLINKATTRVALLEN) >= MAXLINKATTRVALLEN) {
1375                                 free(new_attr_val);
1376                                 return;
1377                         }
1378                         /* skip the '.' delimiter */
1379                         portidstr++;
1380                 }
1381                 new_attr_sz = strlen(new_attr_val) + 1;
1382                 upgraded = B_TRUE;
1383         }
1384 
1385         if (upgraded) {
1386                 attrp->lp_type = DLADM_TYPE_STR;
1387                 attrp->lp_sz = new_attr_sz;
1388                 free(attrp->lp_val);
1389                 attrp->lp_val = new_attr_val;
1390         }
1391 }
1392 
1393 static void
1394 dlmgmt_db_upgrade(dlmgmt_link_t *linkp)
1395 {
1396         dlmgmt_linkattr_t *attrp;
1397 
1398         for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next)
1399                 linkattr_upgrade(attrp);
1400 }
1401 
1402 static void
1403 dlmgmt_db_phys_activate(dlmgmt_link_t *linkp)
1404 {
1405         linkp->ll_flags |= DLMGMT_ACTIVE;
1406         (void) dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_ACTIVE);
1407 }
1408 
1409 static void
1410 dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
1411 {
1412         dlmgmt_link_t *linkp;
1413 
1414         for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1415             linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1416                 if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1417                         func(linkp);
1418         }
1419 }
1420 
1421 /*
1422  * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
1423  *
1424  * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
1425  * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
1426  * Because of the upcall architecture of dlmgmtd this can lead to deadlock
1427  * with the following scenario:
1428  *    a) the thread preparing to fork will have acquired the malloc locks
1429  *       then attempt to suspend every thread in preparation to fork.
1430  *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
1431  *       and get the credentials of their caller.
1432  *    c) we can't suspend the in-kernel thread making the upcall.
1433  *
1434  * Thus, we cannot serve door requests because we're blocked in malloc()
1435  * which fork() owns, but fork() is in turn blocked on the in-kernel thread
1436  * making the door upcall.  This is a fundamental architectural problem with
1437  * any server handling upcalls and also trying to fork().
1438  *
1439  * To minimize the chance of this deadlock occuring, we check ahead of time to
1440  * see if the file we want to read actually exists in the zone (which it almost
1441  * never does), so we don't need fork in that case (i.e. rarely to never).
1442  */
1443 static boolean_t
1444 zone_file_exists(char *zoneroot, char *filename)
1445 {
1446         struct stat     sb;
1447         char            fname[MAXPATHLEN];
1448 
1449         (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
1450 
1451         if (stat(fname, &sb) == -1)
1452                 return (B_FALSE);
1453 
1454         return (B_TRUE);
1455 }
1456 
1457 /*
1458  * Initialize the datalink <link name, linkid> mapping and the link's
1459  * attributes list based on the configuration file /etc/dladm/datalink.conf
1460  * and the active configuration cache file
1461  * /etc/svc/volatile/dladm/datalink-management:default.cache.
1462  */
1463 int
1464 dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1465 {
1466         dlmgmt_db_req_t *req;
1467         int             err;
1468         boolean_t       boot = B_FALSE;
1469         char            tdir[MAXPATHLEN];
1470         char            *path = cachefile;
1471 
1472         if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1473             DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1474                 return (err);
1475 
1476         /* Handle running in a non-native branded zone (i.e. has /native) */
1477         if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
1478                 (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
1479                 path = tdir;
1480         }
1481 
1482         if (zone_file_exists(zoneroot, path)) {
1483                 if ((err = dlmgmt_process_db_req(req)) != 0) {
1484                         /*
1485                          * If we get back ENOENT, that means that the active
1486                          * configuration file doesn't exist yet, and is not an
1487                          * error.  We'll create it down below after we've
1488                          * loaded the persistent configuration.
1489                          */
1490                         if (err != ENOENT)
1491                                 goto done;
1492                         boot = B_TRUE;
1493                 }
1494         } else {
1495                 boot = B_TRUE;
1496         }
1497 
1498         req->ls_flags = DLMGMT_PERSIST;
1499         err = dlmgmt_process_db_req(req);
1500         if (err != 0 && err != ENOENT)
1501                 goto done;
1502         err = 0;
1503         if (rewrite_needed) {
1504                 /*
1505                  * First update links in memory, then dump the entire db to
1506                  * disk.
1507                  */
1508                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade);
1509                 req->ls_op = DLMGMT_DB_OP_WRITE;
1510                 req->ls_linkid = DATALINK_ALL_LINKID;
1511                 if ((err = dlmgmt_process_db_req(req)) != 0 &&
1512                     err != EINPROGRESS)
1513                         goto done;
1514         }
1515         if (boot) {
1516                 dlmgmt_db_walk(zoneid, DATALINK_CLASS_PHYS,
1517                     dlmgmt_db_phys_activate);
1518         }
1519 
1520 done:
1521         if (err == EINPROGRESS)
1522                 err = 0;
1523         else
1524                 free(req);
1525         return (err);
1526 }
1527 
1528 /*
1529  * Remove all links in the given zoneid.
1530  *
1531  * We do this work in two different passes. In the first pass, we remove any
1532  * entry that hasn't been loaned and mark every entry that has been loaned as
1533  * something that is going to be tombstomed. In the second pass, we drop the
1534  * table lock for every entry and remove the tombstombed entry for our zone.
1535  */
1536 void
1537 dlmgmt_db_fini(zoneid_t zoneid)
1538 {
1539         dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1540 
1541         while (linkp != NULL) {
1542                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1543                 if (linkp->ll_zoneid == zoneid) {
1544                         boolean_t onloan = linkp->ll_onloan;
1545 
1546                         /*
1547                          * Cleanup any VNICs that were loaned to the zone
1548                          * before the zone goes away and we can no longer
1549                          * refer to the VNIC by the name/zoneid.
1550                          */
1551                         if (onloan) {
1552                                 (void) dlmgmt_delete_db_entry(linkp,
1553                                     DLMGMT_ACTIVE);
1554                                 linkp->ll_tomb = B_TRUE;
1555                         } else {
1556                                 (void) dlmgmt_destroy_common(linkp,
1557                                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1558                         }
1559                 }
1560                 linkp = next_linkp;
1561         }
1562 
1563 again:
1564         linkp = avl_first(&dlmgmt_name_avl);
1565         while (linkp != NULL) {
1566                 vnic_ioc_delete_t ioc;
1567 
1568                 next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1569 
1570                 if (linkp->ll_zoneid != zoneid) {
1571                         linkp = next_linkp;
1572                         continue;
1573                 }
1574                 ioc.vd_vnic_id = linkp->ll_linkid;
1575                 if (linkp->ll_tomb != B_TRUE)
1576                         abort();
1577 
1578                 /*
1579                  * We have to drop the table lock while going up into the
1580                  * kernel. If we hold the table lock while deleting a vnic, we
1581                  * may get blocked on the mac perimeter and the holder of it may
1582                  * want something from dlmgmtd.
1583                  */
1584                 dlmgmt_table_unlock();
1585 
1586                 if (ioctl(dladm_dld_fd(dld_handle),
1587                     VNIC_IOC_DELETE, &ioc) < 0)
1588                         dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
1589                             "delete VNIC ioctl failed %d %d",
1590                             ioc.vd_vnic_id, errno);
1591 
1592                 /*
1593                  * Even though we've dropped the lock, we know that nothing else
1594                  * could have removed us. Therefore, it should be safe to go
1595                  * through and delete ourselves, but do nothing else. We'll have
1596                  * to restart iteration from the beginning. This can be painful.
1597                  */
1598                 dlmgmt_table_lock(B_TRUE);
1599 
1600                 (void) dlmgmt_destroy_common(linkp,
1601                     DLMGMT_ACTIVE | DLMGMT_PERSIST);
1602                 goto again;
1603         }
1604 
1605 }