1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright 2016 Toomas Soome <tsoome@me.com>
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/errno.h>
  29 #include <sys/stropts.h>
  30 #include <sys/debug.h>
  31 #include <sys/ddi.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/vmem.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/callb.h>
  36 #include <sys/sysevent.h>
  37 #include <sys/sysevent_impl.h>
  38 #include <sys/sysevent/dev.h>
  39 #include <sys/modctl.h>
  40 #include <sys/lofi_impl.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/disp.h>
  43 #include <sys/autoconf.h>
  44 #include <sys/atomic.h>
  45 #include <sys/sdt.h>
  46 
  47 /* for doors */
  48 #include <sys/pathname.h>
  49 #include <sys/door.h>
  50 #include <sys/kmem.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/fs/snode.h>
  53 
  54 /*
  55  * log_sysevent.c - Provides the interfaces for kernel event publication
  56  *                      to the sysevent event daemon (syseventd).
  57  */
  58 
  59 /*
  60  * Debug stuff
  61  */
  62 static int log_event_debug = 0;
  63 #define LOG_DEBUG(args)  if (log_event_debug) cmn_err args
  64 #ifdef DEBUG
  65 #define LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
  66 #else
  67 #define LOG_DEBUG1(args)
  68 #endif
  69 
  70 /*
  71  * Local static vars
  72  */
  73 /* queue of event buffers sent to syseventd */
  74 static log_eventq_t *log_eventq_sent = NULL;
  75 
  76 /*
  77  * Count of event buffers in the queue
  78  */
  79 int log_eventq_cnt = 0;
  80 
  81 /* queue of event buffers awaiting delivery to syseventd */
  82 static log_eventq_t *log_eventq_head = NULL;
  83 static log_eventq_t *log_eventq_tail = NULL;
  84 static uint64_t kernel_event_id = 0;
  85 static int encoding = NV_ENCODE_NATIVE;
  86 
  87 /* log event delivery flag */
  88 #define LOGEVENT_DELIVERY_OK    0       /* OK to deliver event buffers */
  89 #define LOGEVENT_DELIVERY_CONT  1       /* Continue to deliver event buffers */
  90 #define LOGEVENT_DELIVERY_HOLD  2       /* Hold delivering of event buffers */
  91 
  92 /*
  93  * Tunable maximum event buffer queue size. Size depends on how many events
  94  * the queue must hold when syseventd is not available, for example during
  95  * system startup. Experience showed that more than 2000 events could be posted
  96  * due to correctable memory errors.
  97  */
  98 volatile int logevent_max_q_sz = 5000;
  99 
 100 
 101 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
 102 static char logevent_door_upcall_filename[MAXPATHLEN];
 103 
 104 static door_handle_t event_door = NULL;         /* Door for upcalls */
 105 static kmutex_t event_door_mutex;               /* To protect event_door */
 106 
 107 /*
 108  * async thread-related variables
 109  *
 110  * eventq_head_mutex - synchronizes access to the kernel event queue
 111  *
 112  * eventq_sent_mutex - synchronizes access to the queue of event sents to
 113  *                      userlevel
 114  *
 115  * log_event_cv - condition variable signaled when an event has arrived or
 116  *                      userlevel ready to process event buffers
 117  *
 118  * async_thread - asynchronous event delivery thread to userlevel daemon.
 119  *
 120  * sysevent_upcall_status - status of the door upcall link
 121  */
 122 static kmutex_t eventq_head_mutex;
 123 static kmutex_t eventq_sent_mutex;
 124 static kcondvar_t log_event_cv;
 125 static kthread_id_t async_thread = NULL;
 126 
 127 static kmutex_t event_qfull_mutex;
 128 static kcondvar_t event_qfull_cv;
 129 static int event_qfull_blocked = 0;
 130 
 131 static int sysevent_upcall_status = -1;
 132 static kmutex_t registered_channel_mutex;
 133 
 134 /*
 135  * Indicates the syseventd daemon has begun taking events
 136  */
 137 int sysevent_daemon_init = 0;
 138 
 139 /*
 140  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
 141  * caused by the server process doing a forkall().  Since all threads
 142  * but the thread actually doing the forkall() need to be quiesced,
 143  * the fork may take some time.  The min/max pause are in units
 144  * of clock ticks.
 145  */
 146 #define LOG_EVENT_MIN_PAUSE     8
 147 #define LOG_EVENT_MAX_PAUSE     128
 148 
 149 static kmutex_t event_pause_mutex;
 150 static kcondvar_t event_pause_cv;
 151 static int event_pause_state = 0;
 152 
 153 /* Cached device links for lofi. */
 154 lofi_nvl_t lofi_devlink_cache;
 155 
 156 /*ARGSUSED*/
 157 static void
 158 log_event_busy_timeout(void *arg)
 159 {
 160         mutex_enter(&event_pause_mutex);
 161         event_pause_state = 0;
 162         cv_signal(&event_pause_cv);
 163         mutex_exit(&event_pause_mutex);
 164 }
 165 
 166 static void
 167 log_event_pause(int nticks)
 168 {
 169         timeout_id_t id;
 170 
 171         /*
 172          * Only one use of log_event_pause at a time
 173          */
 174         ASSERT(event_pause_state == 0);
 175 
 176         event_pause_state = 1;
 177         id = timeout(log_event_busy_timeout, NULL, nticks);
 178         if (id != 0) {
 179                 mutex_enter(&event_pause_mutex);
 180                 while (event_pause_state)
 181                         cv_wait(&event_pause_cv, &event_pause_mutex);
 182                 mutex_exit(&event_pause_mutex);
 183         }
 184         event_pause_state = 0;
 185 }
 186 
 187 
 188 /*
 189  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
 190  *                      Check for rebinding errors
 191  *                      This buffer is reused to by the syseventd door_return
 192  *                      to hold the result code
 193  */
 194 static int
 195 log_event_upcall(log_event_upcall_arg_t *arg)
 196 {
 197         int error;
 198         size_t size;
 199         sysevent_t *ev;
 200         door_arg_t darg, save_arg;
 201         int retry;
 202         int neagain = 0;
 203         int neintr = 0;
 204         int nticks = LOG_EVENT_MIN_PAUSE;
 205 
 206         /* Initialize door args */
 207         ev = (sysevent_t *)&arg->buf;
 208         size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
 209 
 210         darg.rbuf = (char *)arg;
 211         darg.data_ptr = (char *)arg;
 212         darg.rsize = size;
 213         darg.data_size = size;
 214         darg.desc_ptr = NULL;
 215         darg.desc_num = 0;
 216 
 217         LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
 218             (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
 219 
 220         save_arg = darg;
 221         for (retry = 0; ; retry++) {
 222 
 223                 mutex_enter(&event_door_mutex);
 224                 if (event_door == NULL) {
 225                         mutex_exit(&event_door_mutex);
 226 
 227                         return (EBADF);
 228                 }
 229 
 230                 if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
 231                     SIZE_MAX, 0)) == 0) {
 232                         mutex_exit(&event_door_mutex);
 233                         break;
 234                 }
 235 
 236                 /*
 237                  * EBADF is handled outside the switch below because we need to
 238                  * hold event_door_mutex a bit longer
 239                  */
 240                 if (error == EBADF) {
 241                         /* Server died */
 242                         door_ki_rele(event_door);
 243                         event_door = NULL;
 244 
 245                         mutex_exit(&event_door_mutex);
 246                         return (error);
 247                 }
 248 
 249                 mutex_exit(&event_door_mutex);
 250 
 251                 /*
 252                  * The EBADF case is already handled above with event_door_mutex
 253                  * held
 254                  */
 255                 switch (error) {
 256                 case EINTR:
 257                         neintr++;
 258                         log_event_pause(2);
 259                         darg = save_arg;
 260                         break;
 261                 case EAGAIN:
 262                         /* cannot deliver upcall - process may be forking */
 263                         neagain++;
 264                         log_event_pause(nticks);
 265                         nticks <<= 1;
 266                         if (nticks > LOG_EVENT_MAX_PAUSE)
 267                                 nticks = LOG_EVENT_MAX_PAUSE;
 268                         darg = save_arg;
 269                         break;
 270                 default:
 271                         cmn_err(CE_CONT,
 272                             "log_event_upcall: door_ki_upcall error %d\n",
 273                             error);
 274                         return (error);
 275                 }
 276         }
 277 
 278         if (neagain > 0 || neintr > 0) {
 279                 LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
 280                     neagain, neintr, nticks));
 281         }
 282 
 283         LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
 284             "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
 285             error, (void *)arg, (void *)darg.rbuf,
 286             (void *)darg.data_ptr,
 287             *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
 288 
 289         if (!error) {
 290                 /*
 291                  * upcall was successfully executed. Check return code.
 292                  */
 293                 error = *((int *)(darg.rbuf));
 294         }
 295 
 296         return (error);
 297 }
 298 
 299 /*
 300  * log_event_deliver - event delivery thread
 301  *                      Deliver all events on the event queue to syseventd.
 302  *                      If the daemon can not process events, stop event
 303  *                      delivery and wait for an indication from the
 304  *                      daemon to resume delivery.
 305  *
 306  *                      Once all event buffers have been delivered, wait
 307  *                      until there are more to deliver.
 308  */
 309 static void
 310 log_event_deliver()
 311 {
 312         log_eventq_t *q;
 313         int upcall_err;
 314         callb_cpr_t cprinfo;
 315 
 316         CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
 317             "logevent");
 318 
 319         /*
 320          * eventq_head_mutex is exited (released) when there are no more
 321          * events to process from the eventq in cv_wait().
 322          */
 323         mutex_enter(&eventq_head_mutex);
 324 
 325         for (;;) {
 326                 LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
 327                     (void *)log_eventq_head));
 328 
 329                 upcall_err = 0;
 330                 q = log_eventq_head;
 331 
 332                 while (q) {
 333                         if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
 334                                 upcall_err = EAGAIN;
 335                                 break;
 336                         }
 337 
 338                         log_event_delivery = LOGEVENT_DELIVERY_OK;
 339 
 340                         /*
 341                          * Release event queue lock during upcall to
 342                          * syseventd
 343                          */
 344                         mutex_exit(&eventq_head_mutex);
 345                         if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
 346                                 mutex_enter(&eventq_head_mutex);
 347                                 break;
 348                         }
 349 
 350                         /*
 351                          * We may be able to add entries to
 352                          * the queue now.
 353                          */
 354                         if (event_qfull_blocked > 0 &&
 355                             log_eventq_cnt < logevent_max_q_sz) {
 356                                 mutex_enter(&event_qfull_mutex);
 357                                 if (event_qfull_blocked > 0) {
 358                                         cv_signal(&event_qfull_cv);
 359                                 }
 360                                 mutex_exit(&event_qfull_mutex);
 361                         }
 362 
 363                         mutex_enter(&eventq_head_mutex);
 364 
 365                         /*
 366                          * Daemon restart can cause entries to be moved from
 367                          * the sent queue and put back on the event queue.
 368                          * If this has occurred, replay event queue
 369                          * processing from the new queue head.
 370                          */
 371                         if (q != log_eventq_head) {
 372                                 q = log_eventq_head;
 373                                 LOG_DEBUG((CE_CONT, "log_event_deliver: "
 374                                     "door upcall/daemon restart race\n"));
 375                         } else {
 376                                 log_eventq_t *next;
 377 
 378                                 /*
 379                                  * Move the event to the sent queue when a
 380                                  * successful delivery has been made.
 381                                  */
 382                                 mutex_enter(&eventq_sent_mutex);
 383                                 next = q->next;
 384                                 q->next = log_eventq_sent;
 385                                 log_eventq_sent = q;
 386                                 q = next;
 387                                 log_eventq_head = q;
 388                                 log_eventq_cnt--;
 389                                 if (q == NULL) {
 390                                         ASSERT(log_eventq_cnt == 0);
 391                                         log_eventq_tail = NULL;
 392                                 }
 393                                 mutex_exit(&eventq_sent_mutex);
 394                         }
 395                 }
 396 
 397                 switch (upcall_err) {
 398                 case 0:
 399                         /*
 400                          * Success. The queue is empty.
 401                          */
 402                         sysevent_upcall_status = 0;
 403                         break;
 404                 case EAGAIN:
 405                         /*
 406                          * Delivery is on hold (but functional).
 407                          */
 408                         sysevent_upcall_status = 0;
 409                         /*
 410                          * If the user has already signaled for delivery
 411                          * resumption, continue.  Otherwise, we wait until
 412                          * we are signaled to continue.
 413                          */
 414                         if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
 415                                 continue;
 416                         log_event_delivery = LOGEVENT_DELIVERY_HOLD;
 417 
 418                         LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
 419                         break;
 420                 default:
 421                         LOG_DEBUG((CE_CONT, "log_event_deliver: "
 422                             "upcall err %d\n", upcall_err));
 423                         sysevent_upcall_status = upcall_err;
 424                         /*
 425                          * Signal everyone waiting that transport is down
 426                          */
 427                         if (event_qfull_blocked > 0) {
 428                                 mutex_enter(&event_qfull_mutex);
 429                                 if (event_qfull_blocked > 0) {
 430                                         cv_broadcast(&event_qfull_cv);
 431                                 }
 432                                 mutex_exit(&event_qfull_mutex);
 433                         }
 434                         break;
 435                 }
 436 
 437                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
 438                 cv_wait(&log_event_cv, &eventq_head_mutex);
 439                 CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
 440         }
 441         /* NOTREACHED */
 442 }
 443 
 444 /*
 445  * Set up the nvlist based data cache. User by lofi to find
 446  * device name for mapped file.
 447  */
 448 static void
 449 lofi_nvl_init(lofi_nvl_t *cache)
 450 {
 451         mutex_init(&cache->ln_lock, NULL, MUTEX_DRIVER, NULL);
 452         cv_init(&cache->ln_cv, NULL, CV_DRIVER, NULL);
 453         (void) nvlist_alloc(&cache->ln_data, NV_UNIQUE_NAME, KM_SLEEP);
 454 }
 455 
 456 /*
 457  * log_event_init - Allocate and initialize log_event data structures.
 458  */
 459 void
 460 log_event_init()
 461 {
 462         /* Set up devlink cache for lofi. */
 463         lofi_nvl_init(&lofi_devlink_cache);
 464 
 465         mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
 466 
 467         mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
 468         mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
 469         cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
 470 
 471         mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
 472         cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
 473 
 474         mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
 475         cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
 476 
 477         mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
 478         sysevent_evc_init();
 479 }
 480 
 481 /*
 482  * The following routines are used by kernel event publishers to
 483  * allocate, append and free event buffers
 484  */
 485 /*
 486  * sysevent_alloc - Allocate new eventq struct.  This element contains
 487  *                      an event buffer that will be used in a subsequent
 488  *                      call to log_sysevent.
 489  */
 490 sysevent_t *
 491 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
 492 {
 493         int payload_sz;
 494         int class_sz, subclass_sz, pub_sz;
 495         int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
 496         sysevent_t *ev;
 497         log_eventq_t *q;
 498 
 499         ASSERT(class != NULL);
 500         ASSERT(subclass != NULL);
 501         ASSERT(pub != NULL);
 502 
 503         /*
 504          * Calculate and reserve space for the class, subclass and
 505          * publisher strings in the event buffer
 506          */
 507         class_sz = strlen(class) + 1;
 508         subclass_sz = strlen(subclass) + 1;
 509         pub_sz = strlen(pub) + 1;
 510 
 511         ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
 512             <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
 513 
 514         /* String sizes must be 64-bit aligned in the event buffer */
 515         aligned_class_sz = SE_ALIGN(class_sz);
 516         aligned_subclass_sz = SE_ALIGN(subclass_sz);
 517         aligned_pub_sz = SE_ALIGN(pub_sz);
 518 
 519         payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
 520             (aligned_subclass_sz - sizeof (uint64_t)) +
 521             (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
 522 
 523         /*
 524          * Allocate event buffer plus additional sysevent queue
 525          * and payload overhead.
 526          */
 527         q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
 528         if (q == NULL) {
 529                 return (NULL);
 530         }
 531 
 532         /* Initialize the event buffer data */
 533         ev = (sysevent_t *)&q->arg.buf;
 534         SE_VERSION(ev) = SYS_EVENT_VERSION;
 535         bcopy(class, SE_CLASS_NAME(ev), class_sz);
 536 
 537         SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
 538                 + aligned_class_sz;
 539         bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
 540 
 541         SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
 542         bcopy(pub, SE_PUB_NAME(ev), pub_sz);
 543 
 544         SE_ATTR_PTR(ev) = UINT64_C(0);
 545         SE_PAYLOAD_SZ(ev) = payload_sz;
 546 
 547         return (ev);
 548 }
 549 
 550 /*
 551  * sysevent_free - Free event buffer and any attribute data.
 552  */
 553 void
 554 sysevent_free(sysevent_t *ev)
 555 {
 556         log_eventq_t *q;
 557         nvlist_t *nvl;
 558 
 559         ASSERT(ev != NULL);
 560         q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
 561         nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
 562 
 563         if (nvl != NULL) {
 564                 size_t size = 0;
 565                 (void) nvlist_size(nvl, &size, encoding);
 566                 SE_PAYLOAD_SZ(ev) -= size;
 567                 nvlist_free(nvl);
 568         }
 569         kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
 570 }
 571 
 572 /*
 573  * free_packed_event - Free packed event buffer
 574  */
 575 static void
 576 free_packed_event(sysevent_t *ev)
 577 {
 578         log_eventq_t *q;
 579 
 580         ASSERT(ev != NULL);
 581         q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
 582 
 583         kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
 584 }
 585 
 586 /*
 587  * sysevent_add_attr - Add new attribute element to an event attribute list
 588  *                      If attribute list is NULL, start a new list.
 589  */
 590 int
 591 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
 592     sysevent_value_t *se_value, int flag)
 593 {
 594         int error;
 595         nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
 596 
 597         if (nvlp == NULL || se_value == NULL) {
 598                 return (SE_EINVAL);
 599         }
 600 
 601         /*
 602          * attr_sz is composed of the value data size + the name data size +
 603          * any header data.  64-bit aligned.
 604          */
 605         if (strlen(name) >= MAX_ATTR_NAME) {
 606                 return (SE_EINVAL);
 607         }
 608 
 609         /*
 610          * Allocate nvlist
 611          */
 612         if ((*nvlp == NULL) &&
 613             (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
 614                 return (SE_ENOMEM);
 615 
 616         /* add the attribute */
 617         switch (se_value->value_type) {
 618         case SE_DATA_TYPE_BYTE:
 619                 error = nvlist_add_byte(*ev_attr_list, name,
 620                     se_value->value.sv_byte);
 621                 break;
 622         case SE_DATA_TYPE_INT16:
 623                 error = nvlist_add_int16(*ev_attr_list, name,
 624                     se_value->value.sv_int16);
 625                 break;
 626         case SE_DATA_TYPE_UINT16:
 627                 error = nvlist_add_uint16(*ev_attr_list, name,
 628                     se_value->value.sv_uint16);
 629                 break;
 630         case SE_DATA_TYPE_INT32:
 631                 error = nvlist_add_int32(*ev_attr_list, name,
 632                     se_value->value.sv_int32);
 633                 break;
 634         case SE_DATA_TYPE_UINT32:
 635                 error = nvlist_add_uint32(*ev_attr_list, name,
 636                     se_value->value.sv_uint32);
 637                 break;
 638         case SE_DATA_TYPE_INT64:
 639                 error = nvlist_add_int64(*ev_attr_list, name,
 640                     se_value->value.sv_int64);
 641                 break;
 642         case SE_DATA_TYPE_UINT64:
 643                 error = nvlist_add_uint64(*ev_attr_list, name,
 644                     se_value->value.sv_uint64);
 645                 break;
 646         case SE_DATA_TYPE_STRING:
 647                 if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
 648                         return (SE_EINVAL);
 649                 error = nvlist_add_string(*ev_attr_list, name,
 650                     se_value->value.sv_string);
 651                 break;
 652         case SE_DATA_TYPE_BYTES:
 653                 if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
 654                         return (SE_EINVAL);
 655                 error = nvlist_add_byte_array(*ev_attr_list, name,
 656                     se_value->value.sv_bytes.data,
 657                     se_value->value.sv_bytes.size);
 658                 break;
 659         case SE_DATA_TYPE_TIME:
 660                 error = nvlist_add_hrtime(*ev_attr_list, name,
 661                     se_value->value.sv_time);
 662                 break;
 663         default:
 664                 return (SE_EINVAL);
 665         }
 666 
 667         return (error ? SE_ENOMEM : 0);
 668 }
 669 
 670 /*
 671  * sysevent_free_attr - Free an attribute list not associated with an
 672  *                      event buffer.
 673  */
 674 void
 675 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
 676 {
 677         nvlist_free((nvlist_t *)ev_attr_list);
 678 }
 679 
 680 /*
 681  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
 682  *
 683  *      This data will be re-packed into contiguous memory when the event
 684  *      buffer is posted to log_sysevent.
 685  */
 686 int
 687 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
 688 {
 689         size_t size = 0;
 690 
 691         if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
 692                 return (SE_EINVAL);
 693         }
 694 
 695         SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
 696         (void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
 697         SE_PAYLOAD_SZ(ev) += size;
 698         SE_FLAG(ev) = 0;
 699 
 700         return (0);
 701 }
 702 
 703 /*
 704  * sysevent_detach_attributes - Detach but don't free attribute list from the
 705  *                              event buffer.
 706  */
 707 void
 708 sysevent_detach_attributes(sysevent_t *ev)
 709 {
 710         size_t size = 0;
 711         nvlist_t *nvl;
 712 
 713         if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
 714                 return;
 715         }
 716 
 717         SE_ATTR_PTR(ev) = UINT64_C(0);
 718         (void) nvlist_size(nvl, &size, encoding);
 719         SE_PAYLOAD_SZ(ev) -= size;
 720         ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
 721 }
 722 
 723 /*
 724  * sysevent_attr_name - Get name of attribute
 725  */
 726 char *
 727 sysevent_attr_name(sysevent_attr_t *attr)
 728 {
 729         if (attr == NULL) {
 730                 return (NULL);
 731         }
 732 
 733         return (nvpair_name(attr));
 734 }
 735 
 736 /*
 737  * sysevent_attr_type - Get type of attribute
 738  */
 739 int
 740 sysevent_attr_type(sysevent_attr_t *attr)
 741 {
 742         /*
 743          * The SE_DATA_TYPE_* are typedef'ed to be the
 744          * same value as DATA_TYPE_*
 745          */
 746         return (nvpair_type((nvpair_t *)attr));
 747 }
 748 
 749 /*
 750  * Repack event buffer into contiguous memory
 751  */
 752 static sysevent_t *
 753 se_repack(sysevent_t *ev, int flag)
 754 {
 755         size_t copy_len;
 756         caddr_t attr;
 757         size_t size;
 758         uint64_t attr_offset;
 759         sysevent_t *copy;
 760         log_eventq_t *qcopy;
 761         sysevent_attr_list_t *nvl;
 762 
 763         copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
 764         qcopy = kmem_zalloc(copy_len, flag);
 765         if (qcopy == NULL) {
 766                 return (NULL);
 767         }
 768         copy = (sysevent_t *)&qcopy->arg.buf;
 769 
 770         /*
 771          * Copy event header, class, subclass and publisher names
 772          * Set the attribute offset (in number of bytes) to contiguous
 773          * memory after the header.
 774          */
 775 
 776         attr_offset = SE_ATTR_OFF(ev);
 777 
 778         ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
 779 
 780         bcopy(ev, copy, attr_offset);
 781 
 782         /* Check if attribute list exists */
 783         if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
 784                 return (copy);
 785         }
 786 
 787         /*
 788          * Copy attribute data to contiguous memory
 789          */
 790         attr = (char *)copy + attr_offset;
 791         (void) nvlist_size(nvl, &size, encoding);
 792         if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
 793                 kmem_free(qcopy, copy_len);
 794                 return (NULL);
 795         }
 796         SE_ATTR_PTR(copy) = UINT64_C(0);
 797         SE_FLAG(copy) = SE_PACKED_BUF;
 798 
 799         return (copy);
 800 }
 801 
 802 /*
 803  * The sysevent registration provides a persistent and reliable database
 804  * for channel information for sysevent channel publishers and
 805  * subscribers.
 806  *
 807  * A channel is created and maintained by the kernel upon the first
 808  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
 809  * event subscription information is updated as publishers or subscribers
 810  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
 811  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
 812  *
 813  * For consistency, id's are assigned for every publisher or subscriber
 814  * bound to a particular channel.  The id's are used to constrain resources
 815  * and perform subscription lookup.
 816  *
 817  * Associated with each channel is a hashed list of the current subscriptions
 818  * based upon event class and subclasses.  A subscription contains a class name,
 819  * list of possible subclasses and an array of subscriber ids.  Subscriptions
 820  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
 821  *
 822  * Channels are closed once the last subscriber or publisher performs a
 823  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
 824  * channel are freed upon last close.
 825  *
 826  * Locking:
 827  *      Every operation to log_sysevent() is protected by a single lock,
 828  *      registered_channel_mutex.  It is expected that the granularity of
 829  *      a single lock is sufficient given the frequency that updates will
 830  *      occur.
 831  *
 832  *      If this locking strategy proves to be too contentious, a per-hash
 833  *      or per-channel locking strategy may be implemented.
 834  */
 835 
 836 
 837 #define CHANN_HASH(channel_name)        (hash_func(channel_name) \
 838                                         % CHAN_HASH_SZ)
 839 
 840 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
 841 static int channel_cnt;
 842 static void remove_all_class(sysevent_channel_descriptor_t *chan,
 843         uint32_t sub_id);
 844 
 845 static uint32_t
 846 hash_func(const char *s)
 847 {
 848         uint32_t result = 0;
 849         uint_t g;
 850 
 851         while (*s != '\0') {
 852                 result <<= 4;
 853                 result += (uint32_t)*s++;
 854                 g = result & 0xf0000000;
 855                 if (g != 0) {
 856                         result ^= g >> 24;
 857                         result ^= g;
 858                 }
 859         }
 860 
 861         return (result);
 862 }
 863 
 864 static sysevent_channel_descriptor_t *
 865 get_channel(char *channel_name)
 866 {
 867         int hash_index;
 868         sysevent_channel_descriptor_t *chan_list;
 869 
 870         if (channel_name == NULL)
 871                 return (NULL);
 872 
 873         /* Find channel descriptor */
 874         hash_index = CHANN_HASH(channel_name);
 875         chan_list = registered_channels[hash_index];
 876         while (chan_list != NULL) {
 877                 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
 878                         break;
 879                 } else {
 880                         chan_list = chan_list->scd_next;
 881                 }
 882         }
 883 
 884         return (chan_list);
 885 }
 886 
 887 static class_lst_t *
 888 create_channel_registration(sysevent_channel_descriptor_t *chan,
 889     char *event_class, int index)
 890 {
 891         size_t class_len;
 892         class_lst_t *c_list;
 893 
 894         class_len = strlen(event_class) + 1;
 895         c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
 896         c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
 897         bcopy(event_class, c_list->cl_name, class_len);
 898 
 899         c_list->cl_subclass_list =
 900             kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
 901         c_list->cl_subclass_list->sl_name =
 902             kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
 903         bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
 904             sizeof (EC_SUB_ALL));
 905 
 906         c_list->cl_next = chan->scd_class_list_tbl[index];
 907         chan->scd_class_list_tbl[index] = c_list;
 908 
 909         return (c_list);
 910 }
 911 
 912 static void
 913 free_channel_registration(sysevent_channel_descriptor_t *chan)
 914 {
 915         int i;
 916         class_lst_t *clist, *next_clist;
 917         subclass_lst_t *sclist, *next_sc;
 918 
 919         for (i = 0; i <= CLASS_HASH_SZ; ++i) {
 920 
 921                 clist = chan->scd_class_list_tbl[i];
 922                 while (clist != NULL) {
 923                         sclist = clist->cl_subclass_list;
 924                         while (sclist != NULL) {
 925                                 kmem_free(sclist->sl_name,
 926                                     strlen(sclist->sl_name) + 1);
 927                                 next_sc = sclist->sl_next;
 928                                 kmem_free(sclist, sizeof (subclass_lst_t));
 929                                 sclist = next_sc;
 930                         }
 931                         kmem_free(clist->cl_name,
 932                             strlen(clist->cl_name) + 1);
 933                         next_clist = clist->cl_next;
 934                         kmem_free(clist, sizeof (class_lst_t));
 935                         clist = next_clist;
 936                 }
 937         }
 938         chan->scd_class_list_tbl[0] = NULL;
 939 }
 940 
 941 static int
 942 open_channel(char *channel_name)
 943 {
 944         int hash_index;
 945         sysevent_channel_descriptor_t *chan, *chan_list;
 946 
 947 
 948         if (channel_cnt > MAX_CHAN) {
 949                 return (-1);
 950         }
 951 
 952         /* Find channel descriptor */
 953         hash_index = CHANN_HASH(channel_name);
 954         chan_list = registered_channels[hash_index];
 955         while (chan_list != NULL) {
 956                 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
 957                         chan_list->scd_ref_cnt++;
 958                         kmem_free(channel_name, strlen(channel_name) + 1);
 959                         return (0);
 960                 } else {
 961                         chan_list = chan_list->scd_next;
 962                 }
 963         }
 964 
 965 
 966         /* New channel descriptor */
 967         chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
 968         chan->scd_channel_name = channel_name;
 969 
 970         /*
 971          * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
 972          * Subscriber id 0 is never allocated, but is used as a reserved id
 973          * by libsysevent
 974          */
 975         if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
 976             MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
 977             VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
 978                 kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
 979                 return (-1);
 980         }
 981         if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
 982             MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
 983             VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
 984                 vmem_destroy(chan->scd_subscriber_cache);
 985                 kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
 986                 return (-1);
 987         }
 988 
 989         chan->scd_ref_cnt = 1;
 990 
 991         (void) create_channel_registration(chan, EC_ALL, 0);
 992 
 993         if (registered_channels[hash_index] != NULL)
 994                 chan->scd_next = registered_channels[hash_index];
 995 
 996         registered_channels[hash_index] = chan;
 997 
 998         ++channel_cnt;
 999 
1000         return (0);
1001 }
1002 
1003 static void
1004 close_channel(char *channel_name)
1005 {
1006         int hash_index;
1007         sysevent_channel_descriptor_t *chan, *prev_chan;
1008 
1009         /* Find channel descriptor */
1010         hash_index = CHANN_HASH(channel_name);
1011         prev_chan = chan = registered_channels[hash_index];
1012 
1013         while (chan != NULL) {
1014                 if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1015                         break;
1016                 } else {
1017                         prev_chan = chan;
1018                         chan = chan->scd_next;
1019                 }
1020         }
1021 
1022         if (chan == NULL)
1023                 return;
1024 
1025         chan->scd_ref_cnt--;
1026         if (chan->scd_ref_cnt > 0)
1027                 return;
1028 
1029         free_channel_registration(chan);
1030         vmem_destroy(chan->scd_subscriber_cache);
1031         vmem_destroy(chan->scd_publisher_cache);
1032         kmem_free(chan->scd_channel_name,
1033             strlen(chan->scd_channel_name) + 1);
1034         if (registered_channels[hash_index] == chan)
1035                 registered_channels[hash_index] = chan->scd_next;
1036         else
1037                 prev_chan->scd_next = chan->scd_next;
1038         kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1039         --channel_cnt;
1040 }
1041 
1042 static id_t
1043 bind_common(sysevent_channel_descriptor_t *chan, int type)
1044 {
1045         id_t id;
1046 
1047         if (type == SUBSCRIBER) {
1048                 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1049                     VM_NOSLEEP | VM_NEXTFIT);
1050                 if (id <= 0 || id > MAX_SUBSCRIBERS)
1051                         return (0);
1052                 chan->scd_subscriber_ids[id] = 1;
1053         } else {
1054                 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1055                     VM_NOSLEEP | VM_NEXTFIT);
1056                 if (id <= 0 || id > MAX_PUBLISHERS)
1057                         return (0);
1058                 chan->scd_publisher_ids[id] = 1;
1059         }
1060 
1061         return (id);
1062 }
1063 
1064 static int
1065 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1066 {
1067         if (type == SUBSCRIBER) {
1068                 if (id <= 0 || id > MAX_SUBSCRIBERS)
1069                         return (0);
1070                 if (chan->scd_subscriber_ids[id] == 0)
1071                         return (0);
1072                 (void) remove_all_class(chan, id);
1073                 chan->scd_subscriber_ids[id] = 0;
1074                 vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1075         } else {
1076                 if (id <= 0 || id > MAX_PUBLISHERS)
1077                         return (0);
1078                 if (chan->scd_publisher_ids[id] == 0)
1079                         return (0);
1080                 chan->scd_publisher_ids[id] = 0;
1081                 vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1082         }
1083 
1084         return (1);
1085 }
1086 
1087 static void
1088 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1089 {
1090         if (unbind_common(chan, type, id))
1091                 close_channel(chan->scd_channel_name);
1092 }
1093 
1094 static subclass_lst_t *
1095 find_subclass(class_lst_t *c_list, char *subclass)
1096 {
1097         subclass_lst_t *sc_list;
1098 
1099         if (c_list == NULL)
1100                 return (NULL);
1101 
1102         sc_list = c_list->cl_subclass_list;
1103 
1104         while (sc_list != NULL) {
1105                 if (strcmp(sc_list->sl_name, subclass) == 0) {
1106                         return (sc_list);
1107                 }
1108                 sc_list = sc_list->sl_next;
1109         }
1110 
1111         return (NULL);
1112 }
1113 
1114 static void
1115 insert_subclass(class_lst_t *c_list, char **subclass_names,
1116     int subclass_num, uint32_t sub_id)
1117 {
1118         int i, subclass_sz;
1119         subclass_lst_t *sc_list;
1120 
1121         for (i = 0; i < subclass_num; ++i) {
1122                 if ((sc_list = find_subclass(c_list, subclass_names[i]))
1123                     != NULL) {
1124                         sc_list->sl_num[sub_id] = 1;
1125                 } else {
1126 
1127                         sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1128                             KM_SLEEP);
1129                         subclass_sz = strlen(subclass_names[i]) + 1;
1130                         sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1131                         bcopy(subclass_names[i], sc_list->sl_name,
1132                             subclass_sz);
1133 
1134                         sc_list->sl_num[sub_id] = 1;
1135 
1136                         sc_list->sl_next = c_list->cl_subclass_list;
1137                         c_list->cl_subclass_list = sc_list;
1138                 }
1139         }
1140 }
1141 
1142 static class_lst_t *
1143 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1144 {
1145         class_lst_t *c_list;
1146 
1147         c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1148         while (c_list != NULL) {
1149                 if (strcmp(class_name, c_list->cl_name) == 0)
1150                         break;
1151                 c_list = c_list->cl_next;
1152         }
1153 
1154         return (c_list);
1155 }
1156 
1157 static void
1158 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1159 {
1160         int i;
1161         class_lst_t *c_list;
1162         subclass_lst_t *sc_list;
1163 
1164         for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1165 
1166                 c_list = chan->scd_class_list_tbl[i];
1167                 while (c_list != NULL) {
1168                         sc_list = c_list->cl_subclass_list;
1169                         while (sc_list != NULL) {
1170                                 sc_list->sl_num[sub_id] = 0;
1171                                 sc_list = sc_list->sl_next;
1172                         }
1173                         c_list = c_list->cl_next;
1174                 }
1175         }
1176 }
1177 
1178 static void
1179 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1180     char *class_name)
1181 {
1182         class_lst_t *c_list;
1183         subclass_lst_t *sc_list;
1184 
1185         if (strcmp(class_name, EC_ALL) == 0) {
1186                 remove_all_class(chan, sub_id);
1187                 return;
1188         }
1189 
1190         if ((c_list = find_class(chan, class_name)) == NULL) {
1191                 return;
1192         }
1193 
1194         sc_list = c_list->cl_subclass_list;
1195         while (sc_list != NULL) {
1196                 sc_list->sl_num[sub_id] = 0;
1197                 sc_list = sc_list->sl_next;
1198         }
1199 }
1200 
1201 static int
1202 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1203     char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1204 {
1205         class_lst_t *c_list;
1206 
1207         if (strcmp(event_class, EC_ALL) == 0) {
1208                 insert_subclass(chan->scd_class_list_tbl[0],
1209                     event_subclass_lst, 1, sub_id);
1210                 return (0);
1211         }
1212 
1213         if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1214                 return (-1);
1215 
1216         /* New class, add to the registration cache */
1217         if ((c_list = find_class(chan, event_class)) == NULL) {
1218                 c_list = create_channel_registration(chan, event_class,
1219                     CLASS_HASH(event_class));
1220         }
1221 
1222         /* Update the subclass list */
1223         insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1224 
1225         return (0);
1226 }
1227 
1228 static int
1229 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1230     char *nvlbuf, size_t nvlsize)
1231 {
1232         uint_t num_elem;
1233         char *event_class;
1234         char **event_list;
1235         nvlist_t *nvl;
1236         nvpair_t *nvpair = NULL;
1237 
1238         if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1239                 return (-1);
1240 
1241         if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1242                 nvlist_free(nvl);
1243                 return (-1);
1244         }
1245 
1246         if ((event_class = nvpair_name(nvpair)) == NULL) {
1247                 nvlist_free(nvl);
1248                 return (-1);
1249         }
1250         if (nvpair_value_string_array(nvpair, &event_list,
1251             &num_elem) != 0) {
1252                 nvlist_free(nvl);
1253                 return (-1);
1254         }
1255 
1256         if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1257                 nvlist_free(nvl);
1258                 return (-1);
1259         }
1260 
1261         nvlist_free(nvl);
1262 
1263         return (0);
1264 }
1265 
1266 /*
1267  * get_registration - Return the requested class hash chain
1268  */
1269 static int
1270 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1271     uint32_t *bufsz, uint32_t class_index)
1272 {
1273         int num_classes = 0;
1274         char *nvlbuf = NULL;
1275         size_t nvlsize;
1276         nvlist_t *nvl;
1277         class_lst_t *clist;
1278         subclass_lst_t *sc_list;
1279 
1280         if (class_index < 0 || class_index > CLASS_HASH_SZ)
1281                 return (EINVAL);
1282 
1283         if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1284                 return (ENOENT);
1285         }
1286 
1287         if (nvlist_alloc(&nvl, 0, 0) != 0) {
1288                 return (EFAULT);
1289         }
1290 
1291         while (clist != NULL) {
1292                 if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1293                     != 0) {
1294                         nvlist_free(nvl);
1295                         return (EFAULT);
1296                 }
1297 
1298                 sc_list = clist->cl_subclass_list;
1299                 while (sc_list != NULL) {
1300                         if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1301                             sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1302                                 nvlist_free(nvl);
1303                                 return (EFAULT);
1304                         }
1305                         sc_list = sc_list->sl_next;
1306                 }
1307                 num_classes++;
1308                 clist = clist->cl_next;
1309         }
1310 
1311         if (num_classes == 0) {
1312                 nvlist_free(nvl);
1313                 return (ENOENT);
1314         }
1315 
1316         if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1317             KM_SLEEP)
1318             != 0) {
1319                 nvlist_free(nvl);
1320                 return (EFAULT);
1321         }
1322 
1323         nvlist_free(nvl);
1324 
1325         if (nvlsize > *bufsz) {
1326                 kmem_free(nvlbuf, nvlsize);
1327                 *bufsz = nvlsize;
1328                 return (EAGAIN);
1329         }
1330 
1331         bcopy(nvlbuf, databuf, nvlsize);
1332         kmem_free(nvlbuf, nvlsize);
1333 
1334         return (0);
1335 }
1336 
1337 /*
1338  * log_sysevent_register - Register event subscriber for a particular
1339  *              event channel.
1340  */
1341 int
1342 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1343 {
1344         int error = 0;
1345         char *kchannel, *databuf = NULL;
1346         size_t bufsz;
1347         se_pubsub_t kdata;
1348         sysevent_channel_descriptor_t *chan;
1349 
1350         if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1351                 return (EFAULT);
1352         }
1353         if (kdata.ps_channel_name_len == 0) {
1354                 return (EINVAL);
1355         }
1356         kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1357         if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1358                 kmem_free(kchannel, kdata.ps_channel_name_len);
1359                 return (EFAULT);
1360         }
1361         bufsz = kdata.ps_buflen;
1362         if (bufsz > 0) {
1363                 databuf = kmem_alloc(bufsz, KM_SLEEP);
1364                 if (copyin(udatabuf, databuf, bufsz) == -1) {
1365                         kmem_free(kchannel, kdata.ps_channel_name_len);
1366                         kmem_free(databuf, bufsz);
1367                         return (EFAULT);
1368                 }
1369         }
1370 
1371         mutex_enter(&registered_channel_mutex);
1372         if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1373             kdata.ps_op != SE_CLOSE_REGISTRATION) {
1374                 chan = get_channel(kchannel);
1375                 if (chan == NULL) {
1376                         mutex_exit(&registered_channel_mutex);
1377                         kmem_free(kchannel, kdata.ps_channel_name_len);
1378                         if (bufsz > 0)
1379                                 kmem_free(databuf, bufsz);
1380                         return (ENOENT);
1381                 }
1382         }
1383 
1384         switch (kdata.ps_op) {
1385         case SE_OPEN_REGISTRATION:
1386                 if (open_channel(kchannel) != 0) {
1387                         error = ENOMEM;
1388                         if (bufsz > 0)
1389                                 kmem_free(databuf, bufsz);
1390                         kmem_free(kchannel, kdata.ps_channel_name_len);
1391                 }
1392 
1393                 mutex_exit(&registered_channel_mutex);
1394                 return (error);
1395         case SE_CLOSE_REGISTRATION:
1396                 close_channel(kchannel);
1397                 break;
1398         case SE_BIND_REGISTRATION:
1399                 if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1400                         error = EBUSY;
1401                 break;
1402         case SE_UNBIND_REGISTRATION:
1403                 (void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1404                 break;
1405         case SE_REGISTER:
1406                 if (bufsz == 0) {
1407                         error = EINVAL;
1408                         break;
1409                 }
1410                 if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1411                         error = EINVAL;
1412                 break;
1413         case SE_UNREGISTER:
1414                 if (bufsz == 0) {
1415                         error = EINVAL;
1416                         break;
1417                 }
1418                 remove_class(chan, kdata.ps_id, databuf);
1419                 break;
1420         case SE_CLEANUP:
1421                 /* Cleanup the indicated subscriber or publisher */
1422                 release_id(chan, kdata.ps_type, kdata.ps_id);
1423                 break;
1424         case SE_GET_REGISTRATION:
1425                 error = get_registration(chan, databuf,
1426                     &kdata.ps_buflen, kdata.ps_id);
1427                 break;
1428         default:
1429                 error = ENOTSUP;
1430         }
1431 
1432         mutex_exit(&registered_channel_mutex);
1433 
1434         kmem_free(kchannel, kdata.ps_channel_name_len);
1435 
1436         if (bufsz > 0) {
1437                 if (copyout(databuf, udatabuf, bufsz) == -1)
1438                         error = EFAULT;
1439                 kmem_free(databuf, bufsz);
1440         }
1441 
1442         if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1443                 return (EFAULT);
1444 
1445         return (error);
1446 }
1447 
1448 /*
1449  * log_sysevent_copyout_data - Copyout event data to userland.
1450  *                      This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1451  *                      The buffer size is always sufficient.
1452  */
1453 int
1454 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1455 {
1456         int error = ENOENT;
1457         log_eventq_t *q;
1458         sysevent_t *ev;
1459         sysevent_id_t eid_copy;
1460 
1461         /*
1462          * Copy eid
1463          */
1464         if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1465                 return (EFAULT);
1466         }
1467 
1468         mutex_enter(&eventq_sent_mutex);
1469         q = log_eventq_sent;
1470 
1471         /*
1472          * Search for event buffer on the sent queue with matching
1473          * event identifier
1474          */
1475         while (q) {
1476                 ev = (sysevent_t *)&q->arg.buf;
1477 
1478                 if (SE_TIME(ev) != eid_copy.eid_ts ||
1479                     SE_SEQ(ev) != eid_copy.eid_seq) {
1480                         q = q->next;
1481                         continue;
1482                 }
1483 
1484                 if (ubuflen < SE_SIZE(ev)) {
1485                         error = EFAULT;
1486                         break;
1487                 }
1488                 if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1489                         error = EFAULT;
1490                         LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1491                             "0x%" PRIx64 " from queue: EFAULT\n",
1492                             eid->eid_seq));
1493                 } else {
1494                         error = 0;
1495                 }
1496                 break;
1497         }
1498 
1499         mutex_exit(&eventq_sent_mutex);
1500 
1501         return (error);
1502 }
1503 
1504 /*
1505  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1506  *                      by eid (must have already been sent).  Called from
1507  *                      modctl(MODEVENTS, MODEVENTS_FREEDATA).
1508  */
1509 int
1510 log_sysevent_free_data(sysevent_id_t *eid)
1511 {
1512         int error = ENOENT;
1513         sysevent_t *ev;
1514         log_eventq_t *q, *prev = NULL;
1515         sysevent_id_t eid_copy;
1516 
1517         /*
1518          * Copy eid
1519          */
1520         if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1521                 return (EFAULT);
1522         }
1523 
1524         mutex_enter(&eventq_sent_mutex);
1525         q = log_eventq_sent;
1526 
1527         /*
1528          * Look for the event to be freed on the sent queue.  Due to delayed
1529          * processing of the event, it may not be on the sent queue yet.
1530          * It is up to the user to retry the free operation to ensure that the
1531          * event is properly freed.
1532          */
1533         while (q) {
1534                 ev = (sysevent_t *)&q->arg.buf;
1535 
1536                 if (SE_TIME(ev) != eid_copy.eid_ts ||
1537                     SE_SEQ(ev) != eid_copy.eid_seq) {
1538                         prev = q;
1539                         q = q->next;
1540                         continue;
1541                 }
1542                 /*
1543                  * Take it out of log_eventq_sent and free it
1544                  */
1545                 if (prev) {
1546                         prev->next = q->next;
1547                 } else {
1548                         log_eventq_sent = q->next;
1549                 }
1550                 free_packed_event(ev);
1551                 error = 0;
1552                 break;
1553         }
1554 
1555         mutex_exit(&eventq_sent_mutex);
1556 
1557         return (error);
1558 }
1559 
1560 /*
1561  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1562  *                      create log_event_deliver thread or wake it up
1563  */
1564 /*ARGSUSED*/
1565 void
1566 log_sysevent_flushq(int cmd, uint_t flag)
1567 {
1568         mutex_enter(&eventq_head_mutex);
1569 
1570         /*
1571          * Start the event delivery thread
1572          * Mark the upcall status as active since we should
1573          * now be able to begin emptying the queue normally.
1574          */
1575         if (!async_thread) {
1576                 sysevent_upcall_status = 0;
1577                 sysevent_daemon_init = 1;
1578                 setup_ddi_poststartup();
1579                 async_thread = thread_create(NULL, 0, log_event_deliver,
1580                     NULL, 0, &p0, TS_RUN, minclsyspri);
1581         }
1582 
1583         log_event_delivery = LOGEVENT_DELIVERY_CONT;
1584         cv_signal(&log_event_cv);
1585         mutex_exit(&eventq_head_mutex);
1586 }
1587 
1588 /*
1589  * log_sysevent_filename - Called by syseventd via
1590  *                      modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1591  *                      to subsequently bind the event_door.
1592  *
1593  *                      This routine is called everytime syseventd (re)starts
1594  *                      and must therefore replay any events buffers that have
1595  *                      been sent but not freed.
1596  *
1597  *                      Event buffer delivery begins after a call to
1598  *                      log_sysevent_flushq().
1599  */
1600 int
1601 log_sysevent_filename(char *file)
1602 {
1603         mutex_enter(&event_door_mutex);
1604 
1605         (void) strlcpy(logevent_door_upcall_filename, file,
1606             sizeof (logevent_door_upcall_filename));
1607 
1608         /* Unbind old event door */
1609         if (event_door != NULL)
1610                 door_ki_rele(event_door);
1611         /* Establish door connection with user event daemon (syseventd) */
1612         if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1613                 event_door = NULL;
1614 
1615         mutex_exit(&event_door_mutex);
1616 
1617         /*
1618          * We are called when syseventd restarts. Move all sent, but
1619          * not committed events from log_eventq_sent to log_eventq_head.
1620          * Do it in proper order to maintain increasing event id.
1621          */
1622         mutex_enter(&eventq_head_mutex);
1623 
1624         mutex_enter(&eventq_sent_mutex);
1625         while (log_eventq_sent) {
1626                 log_eventq_t *tmp = log_eventq_sent->next;
1627                 log_eventq_sent->next = log_eventq_head;
1628                 if (log_eventq_head == NULL) {
1629                         ASSERT(log_eventq_cnt == 0);
1630                         log_eventq_tail = log_eventq_sent;
1631                         log_eventq_tail->next = NULL;
1632                 } else if (log_eventq_head == log_eventq_tail) {
1633                         ASSERT(log_eventq_cnt == 1);
1634                         ASSERT(log_eventq_head->next == NULL);
1635                         ASSERT(log_eventq_tail->next == NULL);
1636                 }
1637                 log_eventq_head = log_eventq_sent;
1638                 log_eventq_sent = tmp;
1639                 log_eventq_cnt++;
1640         }
1641         mutex_exit(&eventq_sent_mutex);
1642         mutex_exit(&eventq_head_mutex);
1643 
1644         return (0);
1645 }
1646 
1647 /*
1648  * queue_sysevent - queue an event buffer
1649  */
1650 static int
1651 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1652 {
1653         log_eventq_t *q;
1654 
1655         ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1656 
1657         DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1658 
1659 restart:
1660 
1661         /* Max Q size exceeded */
1662         mutex_enter(&event_qfull_mutex);
1663         if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1664                 /*
1665                  * If queue full and transport down, return no transport
1666                  */
1667                 if (sysevent_upcall_status != 0) {
1668                         mutex_exit(&event_qfull_mutex);
1669                         free_packed_event(ev);
1670                         eid->eid_seq = UINT64_C(0);
1671                         eid->eid_ts = INT64_C(0);
1672                         return (SE_NO_TRANSPORT);
1673                 }
1674                 if (flag == SE_NOSLEEP) {
1675                         mutex_exit(&event_qfull_mutex);
1676                         free_packed_event(ev);
1677                         eid->eid_seq = UINT64_C(0);
1678                         eid->eid_ts = INT64_C(0);
1679                         return (SE_EQSIZE);
1680                 }
1681                 event_qfull_blocked++;
1682                 cv_wait(&event_qfull_cv, &event_qfull_mutex);
1683                 event_qfull_blocked--;
1684                 mutex_exit(&event_qfull_mutex);
1685                 goto restart;
1686         }
1687         mutex_exit(&event_qfull_mutex);
1688 
1689         mutex_enter(&eventq_head_mutex);
1690 
1691         /* Time stamp and assign ID */
1692         SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1693             (uint64_t)1);
1694         SE_TIME(ev) = eid->eid_ts = gethrtime();
1695 
1696         LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1697             SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1698 
1699         /*
1700          * Put event on eventq
1701          */
1702         q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1703         q->next = NULL;
1704         if (log_eventq_head == NULL) {
1705                 ASSERT(log_eventq_cnt == 0);
1706                 log_eventq_head = q;
1707                 log_eventq_tail = q;
1708         } else {
1709                 if (log_eventq_head == log_eventq_tail) {
1710                         ASSERT(log_eventq_cnt == 1);
1711                         ASSERT(log_eventq_head->next == NULL);
1712                         ASSERT(log_eventq_tail->next == NULL);
1713                 }
1714                 log_eventq_tail->next = q;
1715                 log_eventq_tail = q;
1716         }
1717         log_eventq_cnt++;
1718 
1719         /* Signal event delivery thread */
1720         if (log_eventq_cnt == 1) {
1721                 cv_signal(&log_event_cv);
1722         }
1723         mutex_exit(&eventq_head_mutex);
1724 
1725         return (0);
1726 }
1727 
1728 /*
1729  * log_sysevent - kernel system event logger.
1730  *
1731  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1732  * maximum event queue size will be exceeded
1733  * Returns 0 for successfully queued event buffer
1734  */
1735 int
1736 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1737 {
1738         sysevent_t *ev_copy;
1739         int rval;
1740 
1741         ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1742         ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1743 
1744         ev_copy = se_repack(ev, flag);
1745         if (ev_copy == NULL) {
1746                 ASSERT(flag == SE_NOSLEEP);
1747                 return (SE_ENOMEM);
1748         }
1749         rval = queue_sysevent(ev_copy, eid, flag);
1750         ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1751             rval == SE_NO_TRANSPORT);
1752         ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1753         return (rval);
1754 }
1755 
1756 /*
1757  * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi.
1758  * This interface is needed to pass device link names to the lofi driver,
1759  * to be returned via ioctl() to the lofiadm command.
1760  * The problem is, if lofiadm is executed in local zone, there is no
1761  * mechanism to announce the device name from the /dev tree back to lofiadm,
1762  * as sysevents are not accessible from local zone and devfsadmd is only
1763  * running in global zone.
1764  *
1765  * Delayed/missed events are not fatal for lofi, as the device name returned
1766  * to lofiadm is for information and can be re-queried with listing
1767  * mappings with lofiadm command.
1768  *
1769  * Once we have a better method, this interface should be reworked.
1770  */
1771 static void
1772 notify_lofi(sysevent_t *ev)
1773 {
1774         nvlist_t *nvlist;
1775         char name[10], *class, *driver;
1776         int32_t instance;
1777 
1778         class = sysevent_get_class_name(ev);
1779         if ((strcmp(EC_DEV_ADD, class) != 0) &&
1780             (strcmp(EC_DEV_REMOVE, class) != 0)) {
1781                 return;
1782         }
1783 
1784         (void) sysevent_get_attr_list(ev, &nvlist);
1785         driver = fnvlist_lookup_string(nvlist, DEV_DRIVER_NAME);
1786         instance = fnvlist_lookup_int32(nvlist, DEV_INSTANCE);
1787 
1788         /* We are only interested about lofi. */
1789         if (strcmp(driver, "lofi") != 0) {
1790                 fnvlist_free(nvlist);
1791                 return;
1792         }
1793 
1794         /*
1795          * insert or remove device info, then announce the change
1796          * via cv_broadcast.
1797          */
1798         (void) snprintf(name, sizeof (name), "%d", instance);
1799         mutex_enter(&lofi_devlink_cache.ln_lock);
1800         if (strcmp(class, EC_DEV_ADD) == 0) {
1801                 fnvlist_add_nvlist(lofi_devlink_cache.ln_data, name, nvlist);
1802         } else {
1803                 /* Can not use fnvlist_remove() as we can get ENOENT. */
1804                 (void) nvlist_remove_all(lofi_devlink_cache.ln_data, name);
1805         }
1806         cv_broadcast(&lofi_devlink_cache.ln_cv);
1807         mutex_exit(&lofi_devlink_cache.ln_lock);
1808 
1809         fnvlist_free(nvlist);
1810 }
1811 
1812 /*
1813  * log_usr_sysevent - user system event logger
1814  *                      Private to devfsadm and accessible only via
1815  *                      modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1816  */
1817 int
1818 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1819 {
1820         int ret, copy_sz;
1821         sysevent_t *ev_copy;
1822         sysevent_id_t new_eid;
1823         log_eventq_t *qcopy;
1824 
1825         copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1826             offsetof(log_event_upcall_arg_t, buf);
1827         qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1828         ev_copy = (sysevent_t *)&qcopy->arg.buf;
1829 
1830         /*
1831          * Copy event
1832          */
1833         if (copyin(ev, ev_copy, ev_size) == -1) {
1834                 kmem_free(qcopy, copy_sz);
1835                 return (EFAULT);
1836         }
1837 
1838         notify_lofi(ev_copy);
1839 
1840         if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1841                 if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1842                         return (EAGAIN);
1843                 else
1844                         return (EIO);
1845         }
1846 
1847         if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1848                 return (EFAULT);
1849         }
1850 
1851         return (0);
1852 }
1853 
1854 
1855 
1856 int
1857 ddi_log_sysevent(
1858         dev_info_t              *dip,
1859         char                    *vendor,
1860         char                    *class,
1861         char                    *subclass,
1862         nvlist_t                *attr_list,
1863         sysevent_id_t           *eidp,
1864         int                     sleep_flag)
1865 {
1866         sysevent_attr_list_t    *list = (sysevent_attr_list_t *)attr_list;
1867         char                    pubstr[32];
1868         sysevent_t              *event;
1869         sysevent_id_t           eid;
1870         const char              *drvname;
1871         char                    *publisher;
1872         int                     se_flag;
1873         int                     rval;
1874         int                     n;
1875 
1876         if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1877                 cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1878                     "event from interrupt context with sleep semantics\n",
1879                     ddi_driver_name(dip), ddi_get_instance(dip));
1880                 return (DDI_ECONTEXT);
1881         }
1882 
1883         drvname = ddi_driver_name(dip);
1884         n = strlen(vendor) + strlen(drvname) + 7;
1885         if (n < sizeof (pubstr)) {
1886                 publisher = pubstr;
1887         } else {
1888                 publisher = kmem_alloc(n,
1889                     (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1890                 if (publisher == NULL) {
1891                         return (DDI_ENOMEM);
1892                 }
1893         }
1894         (void) strcpy(publisher, vendor);
1895         (void) strcat(publisher, ":kern:");
1896         (void) strcat(publisher, drvname);
1897 
1898         se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1899         event = sysevent_alloc(class, subclass, publisher, se_flag);
1900 
1901         if (publisher != pubstr) {
1902                 kmem_free(publisher, n);
1903         }
1904 
1905         if (event == NULL) {
1906                 return (DDI_ENOMEM);
1907         }
1908 
1909         if (list) {
1910                 (void) sysevent_attach_attributes(event, list);
1911         }
1912 
1913         rval = log_sysevent(event, se_flag, &eid);
1914         if (list) {
1915                 sysevent_detach_attributes(event);
1916         }
1917         sysevent_free(event);
1918         if (rval == 0) {
1919                 if (eidp) {
1920                         eidp->eid_seq = eid.eid_seq;
1921                         eidp->eid_ts = eid.eid_ts;
1922                 }
1923                 return (DDI_SUCCESS);
1924         }
1925         if (rval == SE_NO_TRANSPORT)
1926                 return (DDI_ETRANSPORT);
1927 
1928         ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1929         return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1930 }
1931 
1932 uint64_t
1933 log_sysevent_new_id(void)
1934 {
1935         return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1936 }