1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2012 Milan Jurik. All rights reserved.
  26  * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/socket.h>
  31 #include <sys/list.h>
  32 #include <sys/stropts.h>
  33 #include <sys/siginfo.h>
  34 #include <sys/wait.h>
  35 #include <arpa/inet.h>
  36 #include <netinet/in.h>
  37 #include <stdlib.h>
  38 #include <stdio.h>
  39 #include <strings.h>
  40 #include <stddef.h>
  41 #include <unistd.h>
  42 #include <libilb.h>
  43 #include <port.h>
  44 #include <time.h>
  45 #include <signal.h>
  46 #include <assert.h>
  47 #include <errno.h>
  48 #include <spawn.h>
  49 #include <fcntl.h>
  50 #include <limits.h>
  51 #include "libilb_impl.h"
  52 #include "ilbd.h"
  53 
  54 /* Global list of HC objects */
  55 list_t ilbd_hc_list;
  56 
  57 /* Timer queue for all hc related timers. */
  58 static iu_tq_t *ilbd_hc_timer_q;
  59 
  60 /* Indicate whether the timer needs to be updated */
  61 static boolean_t hc_timer_restarted;
  62 
  63 static void ilbd_hc_probe_timer(iu_tq_t *, void *);
  64 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *);
  65 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *);
  66 
  67 #define MAX(a, b)       ((a) > (b) ? (a) : (b))
  68 
  69 /*
  70  * Number of arguments passed to a probe.  argc[0] is the path name of
  71  * the probe.
  72  */
  73 #define HC_PROBE_ARGC   8
  74 
  75 /*
  76  * Max number of characters to be read from the output of a probe.  It
  77  * is long enough to read in a 64 bit integer.
  78  */
  79 #define HC_MAX_PROBE_OUTPUT     24
  80 
  81 void
  82 i_ilbd_setup_hc_list(void)
  83 {
  84         list_create(&ilbd_hc_list, sizeof (ilbd_hc_t),
  85             offsetof(ilbd_hc_t, ihc_link));
  86 }
  87 
  88 /*
  89  * Given a hc object name, return a pointer to hc object if found.
  90  */
  91 ilbd_hc_t *
  92 ilbd_get_hc(const char *name)
  93 {
  94         ilbd_hc_t *hc;
  95 
  96         for (hc = list_head(&ilbd_hc_list); hc != NULL;
  97             hc = list_next(&ilbd_hc_list, hc)) {
  98                 if (strcasecmp(hc->ihc_name, name) == 0)
  99                         return (hc);
 100         }
 101         return (NULL);
 102 }
 103 
 104 /*
 105  * Generates an audit record for create-healthcheck,
 106  * delete-healtcheck subcommands.
 107  */
 108 static void
 109 ilbd_audit_hc_event(const char *audit_hcname,
 110     const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd,
 111     ilb_status_t rc, ucred_t *ucredp)
 112 {
 113         adt_session_data_t      *ah;
 114         adt_event_data_t        *event;
 115         au_event_t      flag;
 116         int     audit_error;
 117 
 118         if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC))  {
 119                 /*
 120                  * we came here from the path where ilbd incorporates
 121                  * the configuration that is listed in SCF:
 122                  * i_ilbd_read_config->ilbd_walk_hc_pgs->
 123                  *   ->ilbd_scf_instance_walk_pg->ilbd_create_hc
 124                  * We skip auditing in that case
 125                  */
 126                 logdebug("ilbd_audit_hc_event: skipping auditing");
 127                 return;
 128         }
 129 
 130         if (adt_start_session(&ah, NULL, 0) != 0) {
 131                 logerr("ilbd_audit_hc_event: adt_start_session failed");
 132                 exit(EXIT_FAILURE);
 133         }
 134         if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
 135                 (void) adt_end_session(ah);
 136                 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
 137                 exit(EXIT_FAILURE);
 138         }
 139         if (cmd == ILBD_CREATE_HC)
 140                 flag = ADT_ilb_create_healthcheck;
 141         else if (cmd == ILBD_DESTROY_HC)
 142                 flag = ADT_ilb_delete_healthcheck;
 143 
 144         if ((event = adt_alloc_event(ah, flag)) == NULL) {
 145                 logerr("ilbd_audit_hc_event: adt_alloc_event failed");
 146                 exit(EXIT_FAILURE);
 147         }
 148         (void) memset((char *)event, 0, sizeof (adt_event_data_t));
 149 
 150         switch (cmd) {
 151         case ILBD_CREATE_HC:
 152                 event->adt_ilb_create_healthcheck.auth_used =
 153                     NET_ILB_CONFIG_AUTH;
 154                 event->adt_ilb_create_healthcheck.hc_test =
 155                     (char *)audit_hcinfo->hci_test;
 156                 event->adt_ilb_create_healthcheck.hc_name =
 157                     (char *)audit_hcinfo->hci_name;
 158 
 159                 /*
 160                  * If the value 0 is stored, the default values are
 161                  * set in the kernel. User land does not know about them
 162                  * So if the user does not specify them, audit record
 163                  * will show them as 0
 164                  */
 165                 event->adt_ilb_create_healthcheck.hc_timeout =
 166                     audit_hcinfo->hci_timeout;
 167                 event->adt_ilb_create_healthcheck.hc_count =
 168                     audit_hcinfo->hci_count;
 169                 event->adt_ilb_create_healthcheck.hc_interval =
 170                     audit_hcinfo->hci_interval;
 171                 break;
 172         case ILBD_DESTROY_HC:
 173                 event->adt_ilb_delete_healthcheck.auth_used =
 174                     NET_ILB_CONFIG_AUTH;
 175                 event->adt_ilb_delete_healthcheck.hc_name =
 176                     (char *)audit_hcname;
 177                 break;
 178         }
 179 
 180         /* Fill in success/failure */
 181         if (rc == ILB_STATUS_OK) {
 182                 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
 183                         logerr("ilbd_audit_hc_event: adt_put_event failed");
 184                         exit(EXIT_FAILURE);
 185                 }
 186         } else {
 187                 audit_error = ilberror2auditerror(rc);
 188                 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
 189                         logerr("ilbd_audit_hc_event: adt_put_event failed");
 190                         exit(EXIT_FAILURE);
 191                 }
 192         }
 193         adt_free_event(event);
 194         (void) adt_end_session(ah);
 195 }
 196 
 197 /*
 198  * Given the ilb_hc_info_t passed in (from the libilb), create a hc object
 199  * in ilbd.  The parameter ev_port is not used, refer to comments of
 200  * ilbd_create_sg() in ilbd_sg.c
 201  */
 202 /* ARGSUSED */
 203 ilb_status_t
 204 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port,
 205     const struct passwd *ps, ucred_t *ucredp)
 206 {
 207         ilbd_hc_t *hc;
 208         ilb_status_t ret = ILB_STATUS_OK;
 209 
 210         /*
 211          * ps == NULL is from the daemon when it starts and load configuration
 212          * ps != NULL is from client.
 213          */
 214         if (ps != NULL) {
 215                 ret = ilbd_check_client_config_auth(ps);
 216                 if (ret != ILB_STATUS_OK) {
 217                         ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 218                             ret, ucredp);
 219                         return (ret);
 220                 }
 221         }
 222 
 223         if (hc_info->hci_name[0] == '\0') {
 224                 logdebug("ilbd_create_hc: missing healthcheck info");
 225                 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 226                     ILB_STATUS_ENOHCINFO, ucredp);
 227                 return (ILB_STATUS_ENOHCINFO);
 228         }
 229 
 230         hc = ilbd_get_hc(hc_info->hci_name);
 231         if (hc != NULL) {
 232                 logdebug("ilbd_create_hc: healthcheck name %s already"
 233                     " exists", hc_info->hci_name);
 234                 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 235                     ILB_STATUS_EEXIST, ucredp);
 236                 return (ILB_STATUS_EEXIST);
 237         }
 238 
 239         /*
 240          * Sanity check on user supplied probe.  The given path name
 241          * must be a full path name (starts with '/') and is
 242          * executable.
 243          */
 244         if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 &&
 245             strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 &&
 246             strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 &&
 247             (hc_info->hci_test[0] != '/' ||
 248             access(hc_info->hci_test, X_OK) == -1)) {
 249                 if (errno == ENOENT) {
 250                         logdebug("ilbd_create_hc: user script %s doesn't "
 251                             "exist", hc_info->hci_test);
 252                         ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 253                             ILB_STATUS_ENOENT, ucredp);
 254                         return (ILB_STATUS_ENOENT);
 255                 } else {
 256                         logdebug("ilbd_create_hc: user script %s is "
 257                             "invalid", hc_info->hci_test);
 258                         ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 259                             ILB_STATUS_EINVAL, ucredp);
 260                         return (ILB_STATUS_EINVAL);
 261                 }
 262         }
 263 
 264         /* Create and add the hc object */
 265         hc = calloc(1, sizeof (ilbd_hc_t));
 266         if (hc == NULL) {
 267                 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 268                     ILB_STATUS_ENOMEM, ucredp);
 269                 return (ILB_STATUS_ENOMEM);
 270         }
 271         (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t));
 272         if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0)
 273                 hc->ihc_test_type = ILBD_HC_TCP;
 274         else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0)
 275                 hc->ihc_test_type = ILBD_HC_UDP;
 276         else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0)
 277                 hc->ihc_test_type = ILBD_HC_PING;
 278         else
 279                 hc->ihc_test_type = ILBD_HC_USER;
 280         list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t),
 281             offsetof(ilbd_hc_rule_t, hcr_link));
 282 
 283         /* Update SCF */
 284         if (ps != NULL) {
 285                 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) !=
 286                     ILB_STATUS_OK) {
 287                         ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
 288                             ret, ucredp);
 289                         list_destroy(&hc->ihc_rules);
 290                         free(hc);
 291                         return (ret);
 292                 }
 293         }
 294 
 295         /* Everything is fine, now add it to the global list. */
 296         list_insert_tail(&ilbd_hc_list, hc);
 297         ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp);
 298         return (ret);
 299 }
 300 
 301 /*
 302  * Given a name of a hc object, destroy it.
 303  */
 304 ilb_status_t
 305 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps,
 306     ucred_t *ucredp)
 307 {
 308         ilb_status_t ret;
 309         ilbd_hc_t *hc;
 310 
 311         /*
 312          * No need to check ps == NULL, daemon won't call any destroy func
 313          * at start up.
 314          */
 315         ret = ilbd_check_client_config_auth(ps);
 316         if (ret != ILB_STATUS_OK) {
 317                 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
 318                     ret, ucredp);
 319                 return (ret);
 320         }
 321 
 322         hc = ilbd_get_hc(hc_name);
 323         if (hc == NULL) {
 324                 logdebug("ilbd_destroy_hc: healthcheck %s does not exist",
 325                     hc_name);
 326                 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
 327                     ILB_STATUS_ENOENT, ucredp);
 328                 return (ILB_STATUS_ENOENT);
 329         }
 330 
 331         /* If hc is in use, cannot delete it */
 332         if (hc->ihc_rule_cnt > 0) {
 333                 logdebug("ilbd_destroy_hc: healthcheck %s is associated"
 334                     " with a rule - cannot remove", hc_name);
 335                 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
 336                     ILB_STATUS_INUSE, ucredp);
 337                 return (ILB_STATUS_INUSE);
 338         }
 339 
 340         if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) !=
 341             ILB_STATUS_OK) {
 342                 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s "
 343                     "property group", hc_name);
 344                 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
 345                     ret, ucredp);
 346                 return (ret);
 347         }
 348 
 349         list_remove(&ilbd_hc_list, hc);
 350         list_destroy(&hc->ihc_rules);
 351         free(hc);
 352         ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp);
 353         return (ret);
 354 }
 355 
 356 /*
 357  * Given a hc object name, return its information.  Used by libilb to
 358  * get hc info.
 359  */
 360 ilb_status_t
 361 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz)
 362 {
 363         ilbd_hc_t       *hc;
 364         ilb_hc_info_t   *hc_info;
 365         ilb_comm_t      *ic = (ilb_comm_t *)rbuf;
 366 
 367         hc = ilbd_get_hc(hc_name);
 368         if (hc == NULL) {
 369                 logdebug("%s: healthcheck %s does not exist", __func__,
 370                     hc_name);
 371                 return (ILB_STATUS_ENOENT);
 372         }
 373         ilbd_reply_ok(rbuf, rbufsz);
 374         hc_info = (ilb_hc_info_t *)&ic->ic_data;
 375 
 376         (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name));
 377         (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test));
 378         hc_info->hci_timeout = hc->ihc_timeout;
 379         hc_info->hci_count = hc->ihc_count;
 380         hc_info->hci_interval = hc->ihc_interval;
 381         hc_info->hci_def_ping = hc->ihc_def_ping;
 382 
 383         *rbufsz += sizeof (ilb_hc_info_t);
 384 
 385         return (ILB_STATUS_OK);
 386 }
 387 
 388 static void
 389 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule,
 390     const char *rulename)
 391 {
 392         ilbd_hc_srv_t           *tmp_srv;
 393         ilb_hc_srv_t            *dst_srv;
 394         ilb_hc_rule_srv_t       *srvs;
 395         size_t                  tmp_rbufsz;
 396         int                     i;
 397 
 398         tmp_rbufsz = *rbufsz;
 399         /* Set up the reply buffer.  rbufsz will be set to the new size. */
 400         ilbd_reply_ok(rbuf, rbufsz);
 401 
 402         /* Calculate how much space is left for holding server info. */
 403         *rbufsz += sizeof (ilb_hc_rule_srv_t);
 404         tmp_rbufsz -= *rbufsz;
 405 
 406         srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
 407 
 408         tmp_srv = list_head(&hc_rule->hcr_servers);
 409         for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) {
 410                 dst_srv = &srvs->rs_srvs[i];
 411 
 412                 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ);
 413                 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID,
 414                     ILB_NAMESZ);
 415                 (void) strlcpy(dst_srv->hcs_hc_name,
 416                     tmp_srv->shc_hc->ihc_name, ILB_NAMESZ);
 417                 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr;
 418                 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt;
 419                 dst_srv->hcs_status = tmp_srv->shc_status;
 420                 dst_srv->hcs_rtt = tmp_srv->shc_rtt;
 421                 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime;
 422                 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime;
 423 
 424                 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv);
 425                 tmp_rbufsz -= sizeof (*dst_srv);
 426         }
 427         srvs->rs_num_srvs = i;
 428         *rbufsz += i * sizeof (*dst_srv);
 429 }
 430 
 431 /*
 432  * Given a rule name, return the hc status of its servers.
 433  */
 434 ilb_status_t
 435 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz)
 436 {
 437         ilbd_hc_t       *hc;
 438         ilbd_hc_rule_t  *hc_rule;
 439 
 440         for (hc = list_head(&ilbd_hc_list); hc != NULL;
 441             hc = list_next(&ilbd_hc_list, hc)) {
 442                 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
 443                     hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
 444                         if (strcasecmp(hc_rule->hcr_rule->irl_name,
 445                             rulename) != 0) {
 446                                 continue;
 447                         }
 448                         ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename);
 449                         return (ILB_STATUS_OK);
 450                 }
 451         }
 452         return (ILB_STATUS_RULE_NO_HC);
 453 }
 454 
 455 /*
 456  * Initialize the hc timer and associate the notification of timeout to
 457  * the given event port.
 458  */
 459 void
 460 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj)
 461 {
 462         struct sigevent sigev;
 463         port_notify_t notify;
 464 
 465         if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) {
 466                 logerr("%s: cannot create hc timer queue", __func__);
 467                 exit(EXIT_FAILURE);
 468         }
 469         hc_timer_restarted = B_FALSE;
 470 
 471         ev_obj->ev = ILBD_EVENT_TIMER;
 472         ev_obj->timerid = -1;
 473 
 474         notify.portnfy_port = ev_port;
 475         notify.portnfy_user = ev_obj;
 476         sigev.sigev_notify = SIGEV_PORT;
 477         sigev.sigev_value.sival_ptr = &notify;
 478         if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) {
 479                 logerr("%s: cannot create timer", __func__);
 480                 exit(EXIT_FAILURE);
 481         }
 482 }
 483 
 484 /*
 485  * HC timeout handler.
 486  */
 487 void
 488 ilbd_hc_timeout(void)
 489 {
 490         (void) iu_expire_timers(ilbd_hc_timer_q);
 491         hc_timer_restarted = B_TRUE;
 492 }
 493 
 494 /*
 495  * Set up the timer to fire at the earliest timeout.
 496  */
 497 void
 498 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj)
 499 {
 500         itimerspec_t itimeout;
 501         int timeout;
 502 
 503         /*
 504          * There is no change on the timer list, so no need to set up the
 505          * timer again.
 506          */
 507         if (!hc_timer_restarted)
 508                 return;
 509 
 510 restart:
 511         if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) {
 512                 hc_timer_restarted = B_FALSE;
 513                 return;
 514         } else if (timeout == 0) {
 515                 /*
 516                  * Handle the timeout immediately.  After that (clearing all
 517                  * the expired timers), check to  see if there are still
 518                  * timers running.  If yes, start them.
 519                  */
 520                 (void) iu_expire_timers(ilbd_hc_timer_q);
 521                 goto restart;
 522         }
 523 
 524         itimeout.it_value.tv_sec = timeout / MILLISEC + 1;
 525         itimeout.it_value.tv_nsec = 0;
 526         itimeout.it_interval.tv_sec = 0;
 527         itimeout.it_interval.tv_nsec = 0;
 528 
 529         /*
 530          * Failure to set a timeout is "OK" since hopefully there will be
 531          * other events and timer_settime() will be called again.  So
 532          * we will only miss some timeouts.  But in the worst case, no event
 533          * will happen and ilbd will get stuck...
 534          */
 535         if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1)
 536                 logerr("%s: cannot set timer", __func__);
 537         hc_timer_restarted = B_FALSE;
 538 }
 539 
 540 /*
 541  * Kill the probe process of a server.
 542  */
 543 static void
 544 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv)
 545 {
 546         /*
 547          * First dissociate the fd from the event port.  It should not
 548          * fail.
 549          */
 550         if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD,
 551             srv->shc_child_fd) != 0) {
 552                 logdebug("%s: port_dissociate: %s", __func__, strerror(errno));
 553         }
 554         (void) close(srv->shc_child_fd);
 555         free(srv->shc_ev);
 556         srv->shc_ev = NULL;
 557 
 558         /* Then kill the probe process. */
 559         if (kill(srv->shc_child_pid, SIGKILL) != 0) {
 560                 logerr("%s: rule %s server %s: %s", __func__,
 561                     srv->shc_hc_rule->hcr_rule->irl_name,
 562                     srv->shc_sg_srv->sgs_srvID, strerror(errno));
 563         }
 564         /* Should not fail... */
 565         if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) {
 566                 logdebug("%s: waitpid: rule %s server %s", __func__,
 567                     srv->shc_hc_rule->hcr_rule->irl_name,
 568                     srv->shc_sg_srv->sgs_srvID);
 569         }
 570         srv->shc_child_pid = 0;
 571 }
 572 
 573 /*
 574  * Disable the server, either because the server is dead or because a timer
 575  * cannot be started for this server.  Note that this only affects the
 576  * transient configuration, meaning only in memory.  The persistent
 577  * configuration is not affected.
 578  */
 579 static void
 580 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv)
 581 {
 582         srv->shc_status = ILB_HCS_DISABLED;
 583 
 584         /* Disable the server in kernel. */
 585         if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
 586             srv->shc_hc_rule->hcr_rule->irl_name,
 587             stat_declare_srv_dead) != ILB_STATUS_OK) {
 588                 logerr("%s: cannot disable server in kernel: rule %s "
 589                     "server %s", __func__,
 590                     srv->shc_hc_rule->hcr_rule->irl_name,
 591                     srv->shc_sg_srv->sgs_srvID);
 592         }
 593 }
 594 
 595 /*
 596  * A probe fails, set the state of the server.
 597  */
 598 static void
 599 ilbd_set_fail_state(ilbd_hc_srv_t *srv)
 600 {
 601         if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) {
 602                 /* Probe again */
 603                 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
 604                 return;
 605         }
 606 
 607         logdebug("%s: rule %s server %s fails %u", __func__,
 608             srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID,
 609             srv->shc_fail_cnt);
 610 
 611         /*
 612          * If this is a ping test, mark the server as
 613          * unreachable instead of dead.
 614          */
 615         if (srv->shc_hc->ihc_test_type == ILBD_HC_PING ||
 616             srv->shc_state == ilbd_hc_def_pinging) {
 617                 srv->shc_status = ILB_HCS_UNREACH;
 618         } else {
 619                 srv->shc_status = ILB_HCS_DEAD;
 620         }
 621 
 622         /* Disable the server in kernel. */
 623         if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
 624             srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) !=
 625             ILB_STATUS_OK) {
 626                 logerr("%s: cannot disable server in kernel: rule %s "
 627                     "server %s", __func__,
 628                     srv->shc_hc_rule->hcr_rule->irl_name,
 629                     srv->shc_sg_srv->sgs_srvID);
 630         }
 631 
 632         /* Still keep probing in case the server is alive again. */
 633         if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
 634                 /* Only thing to do is to disable the server... */
 635                 logerr("%s: cannot restart timer: rule %s server %s", __func__,
 636                     srv->shc_hc_rule->hcr_rule->irl_name,
 637                     srv->shc_sg_srv->sgs_srvID);
 638                 srv->shc_status = ILB_HCS_DISABLED;
 639         }
 640 }
 641 
 642 /*
 643  * A probe process has not returned for the ihc_timeout period, we should
 644  * kill it.  This function is the handler of this.
 645  */
 646 /* ARGSUSED */
 647 static void
 648 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg)
 649 {
 650         ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
 651 
 652         ilbd_hc_kill_probe(srv);
 653         ilbd_set_fail_state(srv);
 654 }
 655 
 656 /*
 657  * Probe timeout handler.  Send out the appropriate probe.
 658  */
 659 /* ARGSUSED */
 660 static void
 661 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg)
 662 {
 663         ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
 664 
 665         /*
 666          * If starting the probe fails, just pretend that the timeout has
 667          * extended.
 668          */
 669         if (!ilbd_run_probe(srv)) {
 670                 /*
 671                  * If we cannot restart the timer, the only thing we can do
 672                  * is to disable this server.  Hopefully the sys admin will
 673                  * notice this and enable this server again later.
 674                  */
 675                 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
 676                         logerr("%s: cannot restart timer: rule %s server %s, "
 677                             "disabling it", __func__,
 678                             srv->shc_hc_rule->hcr_rule->irl_name,
 679                             srv->shc_sg_srv->sgs_srvID);
 680                         ilbd_mark_server_disabled(srv);
 681                 }
 682                 return;
 683         }
 684 
 685         /*
 686          * Similar to above, if kill timer cannot be started, disable the
 687          * server.
 688          */
 689         if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q,
 690             srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) {
 691                 logerr("%s: cannot start kill timer: rule %s server %s, "
 692                     "disabling it", __func__,
 693                     srv->shc_hc_rule->hcr_rule->irl_name,
 694                     srv->shc_sg_srv->sgs_srvID);
 695                 ilbd_mark_server_disabled(srv);
 696         }
 697         hc_timer_restarted = B_TRUE;
 698 }
 699 
 700 /* Restart the periodic timer for a given server. */
 701 static ilb_status_t
 702 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv)
 703 {
 704         int timeout;
 705 
 706         /* Don't allow the timeout interval to be less than 1s */
 707         timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() %
 708             (hc->ihc_interval + 1)), 1);
 709 
 710         /*
 711          * If the probe is actually a ping probe, there is no need to
 712          * do default pinging.  Just skip the step.
 713          */
 714         if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING)
 715                 srv->shc_state = ilbd_hc_def_pinging;
 716         else
 717                 srv->shc_state = ilbd_hc_probing;
 718         srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout,
 719             ilbd_hc_probe_timer, srv);
 720 
 721         if (srv->shc_tid == -1)
 722                 return (ILB_STATUS_TIMER);
 723         srv->shc_lasttime = time(NULL);
 724         srv->shc_nexttime = time(NULL) + timeout;
 725 
 726         hc_timer_restarted = B_TRUE;
 727         return (ILB_STATUS_OK);
 728 }
 729 
 730 /* Helper routine to associate a server with its hc object. */
 731 static ilb_status_t
 732 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule,
 733     const ilb_sg_srv_t *srv, int ev_port)
 734 {
 735         ilbd_hc_srv_t *new_srv;
 736         ilb_status_t ret;
 737 
 738         if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL)
 739                 return (ILB_STATUS_ENOMEM);
 740         new_srv->shc_hc = hc;
 741         new_srv->shc_hc_rule = hc_rule;
 742         new_srv->shc_sg_srv = srv;
 743         new_srv->shc_ev_port = ev_port;
 744         new_srv->shc_tid = -1;
 745         new_srv->shc_nexttime = time(NULL);
 746         new_srv->shc_lasttime = new_srv->shc_nexttime;
 747 
 748         if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) &&
 749             ILB_IS_SRV_ENABLED(srv->sgs_flags)) {
 750                 new_srv->shc_status = ILB_HCS_UNINIT;
 751                 ret = ilbd_hc_restart_timer(hc, new_srv);
 752                 if (ret != ILB_STATUS_OK) {
 753                         free(new_srv);
 754                         return (ret);
 755                 }
 756         } else {
 757                 new_srv->shc_status = ILB_HCS_DISABLED;
 758         }
 759 
 760         list_insert_tail(&hc_rule->hcr_servers, new_srv);
 761         return (ILB_STATUS_OK);
 762 }
 763 
 764 /* Handy macro to cancel a server's timer. */
 765 #define HC_CANCEL_TIMER(srv)                                            \
 766 {                                                                       \
 767         void *arg;                                                      \
 768         int ret;                                                        \
 769         if ((srv)->shc_tid != -1) {                                  \
 770                 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \
 771                 (srv)->shc_tid = -1;                                 \
 772                 assert(ret == 1);                                       \
 773                 assert(arg == (srv));                                   \
 774         }                                                               \
 775         hc_timer_restarted = B_TRUE;                                    \
 776 }
 777 
 778 /* Helper routine to dissociate a server from its hc object. */
 779 static ilb_status_t
 780 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv)
 781 {
 782         ilbd_hc_srv_t *tmp_srv;
 783 
 784         for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
 785             tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
 786                 if (tmp_srv->shc_sg_srv == srv) {
 787                         list_remove(&hc_rule->hcr_servers, tmp_srv);
 788                         HC_CANCEL_TIMER(tmp_srv);
 789                         if (tmp_srv->shc_child_pid != 0)
 790                                 ilbd_hc_kill_probe(tmp_srv);
 791                         free(tmp_srv);
 792                         return (ILB_STATUS_OK);
 793                 }
 794         }
 795         return (ILB_STATUS_ENOENT);
 796 }
 797 
 798 /* Helper routine to dissociate all servers of a rule from its hc object. */
 799 static void
 800 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule)
 801 {
 802         ilbd_hc_srv_t *srv;
 803 
 804         while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) {
 805                 HC_CANCEL_TIMER(srv);
 806                 if (srv->shc_child_pid != 0)
 807                         ilbd_hc_kill_probe(srv);
 808                 free(srv);
 809         }
 810 }
 811 
 812 /* Associate a rule with its hc object. */
 813 ilb_status_t
 814 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port)
 815 {
 816         ilbd_hc_t       *hc;
 817         ilbd_hc_rule_t  *hc_rule;
 818         ilb_status_t    ret;
 819         ilbd_sg_t       *sg;
 820         ilbd_srv_t      *ilbd_srv;
 821 
 822         /* The rule is assumed to be initialized appropriately. */
 823         if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
 824                 logdebug("ilbd_hc_associate_rule: healthcheck %s does not "
 825                     "exist", rule->irl_hcname);
 826                 return (ILB_STATUS_ENOHCINFO);
 827         }
 828         if ((hc->ihc_test_type == ILBD_HC_TCP &&
 829             rule->irl_proto != IPPROTO_TCP) ||
 830             (hc->ihc_test_type == ILBD_HC_UDP &&
 831             rule->irl_proto != IPPROTO_UDP)) {
 832                 return (ILB_STATUS_RULE_HC_MISMATCH);
 833         }
 834         if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) {
 835                 logdebug("ilbd_hc_associate_rule: out of memory");
 836                 return (ILB_STATUS_ENOMEM);
 837         }
 838 
 839         hc_rule->hcr_rule = rule;
 840         list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t),
 841             offsetof(ilbd_hc_srv_t, shc_srv_link));
 842 
 843         /* Add all the servers. */
 844         sg = rule->irl_sg;
 845         for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL;
 846             ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) {
 847                 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv,
 848                     ev_port)) != ILB_STATUS_OK) {
 849                         /* Remove all previously added servers */
 850                         ilbd_hc_srv_rem_all(hc_rule);
 851                         list_destroy(&hc_rule->hcr_servers);
 852                         free(hc_rule);
 853                         return (ret);
 854                 }
 855         }
 856         list_insert_tail(&hc->ihc_rules, hc_rule);
 857         hc->ihc_rule_cnt++;
 858 
 859         return (ILB_STATUS_OK);
 860 }
 861 
 862 /* Dissociate a rule from its hc object. */
 863 ilb_status_t
 864 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule)
 865 {
 866         ilbd_hc_t       *hc;
 867         ilbd_hc_rule_t  *hc_rule;
 868 
 869         /* The rule is assumed to be initialized appropriately. */
 870         if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
 871                 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not "
 872                     "exist", rule->irl_hcname);
 873                 return (ILB_STATUS_ENOENT);
 874         }
 875         for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
 876             hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
 877                 if (hc_rule->hcr_rule == rule)
 878                         break;
 879         }
 880         if (hc_rule == NULL) {
 881                 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated "
 882                     "with healtcheck %s", rule->irl_hcname, hc->ihc_name);
 883                 return (ILB_STATUS_ENOENT);
 884         }
 885         ilbd_hc_srv_rem_all(hc_rule);
 886         list_remove(&hc->ihc_rules, hc_rule);
 887         hc->ihc_rule_cnt--;
 888         list_destroy(&hc_rule->hcr_servers);
 889         free(hc_rule);
 890         return (ILB_STATUS_OK);
 891 }
 892 
 893 /*
 894  * Given a hc object name and a rule, check to see if the rule is associated
 895  * with the hc object.  If it is, the hc object is returned in **hc and the
 896  * ilbd_hc_rule_t is returned in **hc_rule.
 897  */
 898 static boolean_t
 899 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule,
 900     ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule)
 901 {
 902         ilbd_hc_t       *tmp_hc;
 903         ilbd_hc_rule_t  *tmp_hc_rule;
 904 
 905         if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL)
 906                 return (B_FALSE);
 907         for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL;
 908             tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) {
 909                 if (tmp_hc_rule->hcr_rule == rule) {
 910                         *hc = tmp_hc;
 911                         *hc_rule = tmp_hc_rule;
 912                         return (B_TRUE);
 913                 }
 914         }
 915         return (B_FALSE);
 916 }
 917 
 918 /* Associate a server with its hc object. */
 919 ilb_status_t
 920 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
 921     int ev_port)
 922 {
 923         ilbd_hc_t       *hc;
 924         ilbd_hc_rule_t  *hc_rule;
 925 
 926         if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
 927                 return (ILB_STATUS_ENOENT);
 928         return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port));
 929 }
 930 
 931 /* Dissociate a server from its hc object. */
 932 ilb_status_t
 933 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
 934 {
 935         ilbd_hc_t       *hc;
 936         ilbd_hc_rule_t  *hc_rule;
 937 
 938         if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
 939                 return (ILB_STATUS_ENOENT);
 940         return (ilbd_hc_srv_rem(hc_rule, srv));
 941 }
 942 
 943 /* Helper routine to enable/disable a server's hc probe. */
 944 static ilb_status_t
 945 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
 946     boolean_t enable)
 947 {
 948         ilbd_hc_t       *hc;
 949         ilbd_hc_rule_t  *hc_rule;
 950         ilbd_hc_srv_t   *tmp_srv;
 951         ilb_status_t    ret;
 952 
 953         if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
 954                 return (ILB_STATUS_ENOENT);
 955         for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
 956             tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
 957                 if (tmp_srv->shc_sg_srv != srv) {
 958                         continue;
 959                 }
 960                 if (enable) {
 961                         if (tmp_srv->shc_status == ILB_HCS_DISABLED) {
 962                                 ret = ilbd_hc_restart_timer(hc, tmp_srv);
 963                                 if (ret != ILB_STATUS_OK) {
 964                                         logerr("%s: cannot start timers for "
 965                                             "rule %s server %s", __func__,
 966                                             rule->irl_name,
 967                                             tmp_srv->shc_sg_srv->sgs_srvID);
 968                                         return (ret);
 969                                 }
 970                                 /* Start from fresh... */
 971                                 tmp_srv->shc_status = ILB_HCS_UNINIT;
 972                                 tmp_srv->shc_rtt = 0;
 973                                 tmp_srv->shc_fail_cnt = 0;
 974                         }
 975                 } else {
 976                         if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
 977                                 tmp_srv->shc_status = ILB_HCS_DISABLED;
 978                                 HC_CANCEL_TIMER(tmp_srv);
 979                                 if (tmp_srv->shc_child_pid != 0)
 980                                         ilbd_hc_kill_probe(tmp_srv);
 981                         }
 982                 }
 983                 return (ILB_STATUS_OK);
 984         }
 985         return (ILB_STATUS_ENOENT);
 986 }
 987 
 988 ilb_status_t
 989 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
 990 {
 991         return (ilbd_hc_toggle_server(rule, srv, B_TRUE));
 992 }
 993 
 994 ilb_status_t
 995 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
 996 {
 997         return (ilbd_hc_toggle_server(rule, srv, B_FALSE));
 998 }
 999 
1000 /*
1001  * Helper routine to enable/disable a rule's hc probe (including all its
1002  * servers).
1003  */
1004 static ilb_status_t
1005 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable)
1006 {
1007         ilbd_hc_t       *hc;
1008         ilbd_hc_rule_t  *hc_rule;
1009         ilbd_hc_srv_t   *tmp_srv;
1010         int             ret;
1011 
1012         if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
1013                 return (ILB_STATUS_ENOENT);
1014 
1015         for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
1016             tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
1017                 if (enable) {
1018                         /*
1019                          * If the server is disabled in the rule, do not
1020                          * restart its timer.
1021                          */
1022                         if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1023                             ILB_IS_SRV_ENABLED(
1024                             tmp_srv->shc_sg_srv->sgs_flags)) {
1025                                 ret = ilbd_hc_restart_timer(hc, tmp_srv);
1026                                 if (ret != ILB_STATUS_OK) {
1027                                         logerr("%s: cannot start timers for "
1028                                             "rule %s server %s", __func__,
1029                                             rule->irl_name,
1030                                             tmp_srv->shc_sg_srv->sgs_srvID);
1031                                         goto rollback;
1032                                 } else {
1033                                         /* Start from fresh... */
1034                                         tmp_srv->shc_status = ILB_HCS_UNINIT;
1035                                         tmp_srv->shc_rtt = 0;
1036                                         tmp_srv->shc_fail_cnt = 0;
1037                                 }
1038                         }
1039                 } else {
1040                         if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1041                                 HC_CANCEL_TIMER(tmp_srv);
1042                                 tmp_srv->shc_status = ILB_HCS_DISABLED;
1043                                 if (tmp_srv->shc_child_pid != 0)
1044                                         ilbd_hc_kill_probe(tmp_srv);
1045                         }
1046                 }
1047         }
1048         return (ILB_STATUS_OK);
1049 rollback:
1050         enable = !enable;
1051         for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv);
1052             tmp_srv != NULL;
1053             tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) {
1054                 if (enable) {
1055                         if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1056                             ILB_IS_SRV_ENABLED(
1057                             tmp_srv->shc_sg_srv->sgs_flags)) {
1058                                 (void) ilbd_hc_restart_timer(hc, tmp_srv);
1059                                 tmp_srv->shc_status = ILB_HCS_UNINIT;
1060                                 tmp_srv->shc_rtt = 0;
1061                                 tmp_srv->shc_fail_cnt = 0;
1062                         }
1063                 } else {
1064                         if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1065                                 HC_CANCEL_TIMER(tmp_srv);
1066                                 tmp_srv->shc_status = ILB_HCS_DISABLED;
1067                                 if (tmp_srv->shc_child_pid != 0)
1068                                         ilbd_hc_kill_probe(tmp_srv);
1069                         }
1070                 }
1071         }
1072         return (ret);
1073 }
1074 
1075 ilb_status_t
1076 ilbd_hc_enable_rule(const ilbd_rule_t *rule)
1077 {
1078         return (ilbd_hc_toggle_rule(rule, B_TRUE));
1079 }
1080 
1081 ilb_status_t
1082 ilbd_hc_disable_rule(const ilbd_rule_t *rule)
1083 {
1084         return (ilbd_hc_toggle_rule(rule, B_FALSE));
1085 }
1086 
1087 static const char *
1088 topo_2_str(ilb_topo_t topo)
1089 {
1090         switch (topo) {
1091         case ILB_TOPO_DSR:
1092                 return ("DSR");
1093         case ILB_TOPO_NAT:
1094                 return ("NAT");
1095         case ILB_TOPO_HALF_NAT:
1096                 return ("HALF_NAT");
1097         default:
1098                 /* Should not happen. */
1099                 logerr("%s: unknown topology", __func__);
1100                 break;
1101         }
1102         return ("");
1103 }
1104 
1105 /*
1106  * Create the argument list to be passed to a hc probe command.
1107  * The passed in argv is assumed to have HC_PROBE_ARGC elements.
1108  */
1109 static boolean_t
1110 create_argv(ilbd_hc_srv_t *srv, char *argv[])
1111 {
1112         char buf[INET6_ADDRSTRLEN];
1113         ilbd_rule_t const *rule;
1114         ilb_sg_srv_t const *sg_srv;
1115         struct in_addr v4_addr;
1116         in_port_t port;
1117         int i;
1118 
1119         rule = srv->shc_hc_rule->hcr_rule;
1120         sg_srv = srv->shc_sg_srv;
1121 
1122         if (srv->shc_state == ilbd_hc_def_pinging) {
1123                 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL)
1124                         return (B_FALSE);
1125         } else {
1126                 switch (srv->shc_hc->ihc_test_type) {
1127                 case ILBD_HC_USER:
1128                         if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL)
1129                                 return (B_FALSE);
1130                         break;
1131                 case ILBD_HC_TCP:
1132                 case ILBD_HC_UDP:
1133                         if ((argv[0] = strdup(ILB_PROBE_PROTO)) ==
1134                             NULL) {
1135                                 return (B_FALSE);
1136                         }
1137                         break;
1138                 case ILBD_HC_PING:
1139                         if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) {
1140                                 return (B_FALSE);
1141                         }
1142                         break;
1143                 }
1144         }
1145 
1146         /*
1147          * argv[1] is the VIP.
1148          *
1149          * Right now, the VIP and the backend server addresses should be
1150          * in the same IP address family.  Here we don't do that in case
1151          * this assumption is changed in future.
1152          */
1153         if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) {
1154                 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr);
1155                 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1156                         goto cleanup;
1157         } else {
1158                 if (inet_ntop(AF_INET6, &rule->irl_vip, buf,
1159                     sizeof (buf)) == NULL) {
1160                         goto cleanup;
1161                 }
1162         }
1163         if ((argv[1] = strdup(buf)) == NULL)
1164                 goto cleanup;
1165 
1166         /*
1167          * argv[2] is the backend server address.
1168          */
1169         if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) {
1170                 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr);
1171                 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1172                         goto cleanup;
1173         } else {
1174                 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf,
1175                     sizeof (buf)) == NULL) {
1176                         goto cleanup;
1177                 }
1178         }
1179         if ((argv[2] = strdup(buf)) == NULL)
1180                 goto cleanup;
1181 
1182         /*
1183          * argv[3] is the transport protocol used in the rule.
1184          */
1185         switch (rule->irl_proto) {
1186         case IPPROTO_TCP:
1187                 argv[3] = strdup("TCP");
1188                 break;
1189         case IPPROTO_UDP:
1190                 argv[3] = strdup("UDP");
1191                 break;
1192         default:
1193                 logerr("%s: unknown protocol", __func__);
1194                 goto cleanup;
1195         }
1196         if (argv[3] == NULL)
1197                 goto cleanup;
1198 
1199         /*
1200          * argv[4] is the load balance mode, DSR, NAT, HALF-NAT.
1201          */
1202         if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL)
1203                 goto cleanup;
1204 
1205         /*
1206          * argv[5] is the port range.  Right now, there should only be 1 port.
1207          */
1208         switch (rule->irl_hcpflag) {
1209         case ILB_HCI_PROBE_FIX:
1210                 port = ntohs(rule->irl_hcport);
1211                 break;
1212         case ILB_HCI_PROBE_ANY: {
1213                 in_port_t min, max;
1214 
1215                 if (ntohs(sg_srv->sgs_minport) == 0) {
1216                         min = ntohs(rule->irl_minport);
1217                         max = ntohs(rule->irl_maxport);
1218                 } else {
1219                         min = ntohs(sg_srv->sgs_minport);
1220                         max = ntohs(sg_srv->sgs_maxport);
1221                 }
1222                 if (max > min)
1223                         port = min + gethrtime() % (max - min + 1);
1224                 else
1225                         port = min;
1226                 break;
1227         }
1228         default:
1229                 logerr("%s: unknown HC flag", __func__);
1230                 goto cleanup;
1231         }
1232         (void) sprintf(buf, "%d", port);
1233         if ((argv[5] = strdup(buf)) == NULL)
1234                 goto cleanup;
1235 
1236         /*
1237          * argv[6] is the probe timeout.
1238          */
1239         (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout);
1240         if ((argv[6] = strdup(buf)) == NULL)
1241                 goto cleanup;
1242 
1243         argv[7] = NULL;
1244         return (B_TRUE);
1245 
1246 cleanup:
1247         for (i = 0; i < HC_PROBE_ARGC; i++) {
1248                 if (argv[i] != NULL)
1249                         free(argv[i]);
1250         }
1251         return (B_FALSE);
1252 }
1253 
1254 static void
1255 destroy_argv(char *argv[])
1256 {
1257         int i;
1258 
1259         for (i = 0; argv[i] != NULL; i++)
1260                 free(argv[i]);
1261 }
1262 
1263 /* Spawn a process to run the hc probe on the given server. */
1264 static boolean_t
1265 ilbd_run_probe(ilbd_hc_srv_t *srv)
1266 {
1267         posix_spawn_file_actions_t      fd_actions;
1268         posix_spawnattr_t               attr;
1269         sigset_t                        child_sigset;
1270         int                             fds[2];
1271         int                             fdflags;
1272         pid_t                           pid;
1273         char                            *child_argv[HC_PROBE_ARGC];
1274         ilbd_hc_probe_event_t           *probe_ev;
1275         char                            *probe_name;
1276 
1277         bzero(child_argv, HC_PROBE_ARGC * sizeof (char *));
1278         if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) {
1279                 logdebug("ilbd_run_probe: calloc");
1280                 return (B_FALSE);
1281         }
1282 
1283         /* Set up a pipe to get output from probe command. */
1284         if (pipe(fds) < 0) {
1285                 logdebug("ilbd_run_probe: cannot create pipe");
1286                 free(probe_ev);
1287                 return (B_FALSE);
1288         }
1289         /* Set our side of the pipe to be non-blocking */
1290         if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) {
1291                 logdebug("ilbd_run_probe: fcntl(F_GETFL)");
1292                 goto cleanup_noactions;
1293         }
1294         if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) {
1295                 logdebug("ilbd_run_probe: fcntl(F_SETFL)");
1296                 goto cleanup_noactions;
1297         }
1298 
1299         if (posix_spawn_file_actions_init(&fd_actions) != 0) {
1300                 logdebug("ilbd_run_probe: posix_spawn_file_actions_init");
1301                 goto cleanup_noactions;
1302         }
1303         if (posix_spawnattr_init(&attr) != 0) {
1304                 logdebug("ilbd_run_probe: posix_spawnattr_init");
1305                 goto cleanup_noattr;
1306         }
1307         if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) {
1308                 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1309                 goto cleanup;
1310         }
1311         if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1],
1312             STDOUT_FILENO) != 0) {
1313                 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2");
1314                 goto cleanup;
1315         }
1316         if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) {
1317                 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1318                 goto cleanup;
1319         }
1320 
1321         /* Reset all signal handling of the child to default. */
1322         (void) sigfillset(&child_sigset);
1323         if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) {
1324                 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault");
1325                 goto cleanup;
1326         }
1327         /* Don't want SIGCHLD. */
1328         if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP|
1329             POSIX_SPAWN_SETSIGDEF) != 0) {
1330                 logdebug("ilbd_run_probe: posix_spawnattr_setflags");
1331                 goto cleanup;
1332         }
1333 
1334         if (!create_argv(srv, child_argv)) {
1335                 logdebug("ilbd_run_probe: create_argv");
1336                 goto cleanup;
1337         }
1338 
1339         /*
1340          * If we are doing default pinging or not using a user supplied
1341          * probe, we should execute our standard supplied probe.  The
1342          * supplied probe command handles all types of probes.  And the
1343          * type used depends on argv[0], as filled in by create_argv().
1344          */
1345         if (srv->shc_state == ilbd_hc_def_pinging ||
1346             srv->shc_hc->ihc_test_type != ILBD_HC_USER) {
1347                 probe_name = ILB_PROBE_PROTO;
1348         } else {
1349                 probe_name = srv->shc_hc->ihc_test;
1350         }
1351         if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv,
1352             NULL) != 0) {
1353                 logerr("%s: posix_spawn: %s for server %s: %s", __func__,
1354                     srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID,
1355                     strerror(errno));
1356                 goto cleanup;
1357         }
1358 
1359         (void) posix_spawnattr_destroy(&attr);
1360         (void) posix_spawn_file_actions_destroy(&fd_actions);
1361         (void) close(fds[1]);
1362         srv->shc_child_pid = pid;
1363         srv->shc_child_fd = fds[0];
1364         srv->shc_ev = probe_ev;
1365 
1366         probe_ev->ihp_ev = ILBD_EVENT_PROBE;
1367         probe_ev->ihp_srv = srv;
1368         probe_ev->ihp_pid = pid;
1369         if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0],
1370             POLLRDNORM, probe_ev) != 0) {
1371                 /*
1372                  * Need to kill the child.  It will free the srv->shc_ev,
1373                  * which is probe_ev.  So set probe_ev to NULL.
1374                  */
1375                 ilbd_hc_kill_probe(srv);
1376                 probe_ev = NULL;
1377                 /* posix_spawn attrs & actions already destroyed. */
1378                 goto cleanup_noactions;
1379         }
1380         destroy_argv(child_argv);
1381 
1382         return (B_TRUE);
1383 
1384 cleanup:
1385         (void) posix_spawnattr_destroy(&attr);
1386 cleanup_noattr:
1387         (void) posix_spawn_file_actions_destroy(&fd_actions);
1388 cleanup_noactions:
1389         (void) close(fds[0]);
1390         (void) close(fds[1]);
1391         destroy_argv(child_argv);
1392         if (probe_ev != NULL)
1393                 free(probe_ev);
1394         return (B_FALSE);
1395 }
1396 
1397 /*
1398  * Called by ild_hc_probe_return() to re-associate the fd to a child to
1399  * the event port.
1400  */
1401 static void
1402 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1403 {
1404         if (port_associate(ev_port, PORT_SOURCE_FD, fd,
1405             POLLRDNORM, ev) != 0) {
1406                 /*
1407                  * If we cannot reassociate with the port, the only
1408                  * thing we can do now is to kill the child and
1409                  * do a blocking wait here...
1410                  */
1411                 logdebug("%s: port_associate: %s", __func__, strerror(errno));
1412                 if (kill(ev->ihp_pid, SIGKILL) != 0)
1413                         logerr("%s: kill: %s", __func__, strerror(errno));
1414                 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid)
1415                         logdebug("%s: waitpid: %s", __func__, strerror(errno));
1416                 free(ev);
1417         }
1418 }
1419 
1420 /*
1421  * To handle a child probe process hanging up.
1422  */
1423 static void
1424 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1425 {
1426         ilbd_hc_srv_t *srv;
1427         pid_t ret_pid;
1428         int ret;
1429 
1430         srv = ev->ihp_srv;
1431 
1432         if (!ev->ihp_done) {
1433                 /* ilbd does not care about this process anymore ... */
1434                 ev->ihp_done = B_TRUE;
1435                 srv->shc_ev = NULL;
1436                 srv->shc_child_pid = 0;
1437                 HC_CANCEL_TIMER(srv);
1438                 ilbd_set_fail_state(srv);
1439         }
1440         ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG);
1441         switch (ret_pid) {
1442         case -1:
1443                 logperror("ilbd_hc_child_hup: waitpid");
1444                 /* FALLTHROUGH */
1445         case 0:
1446                 /* The child has not completed the exit. Wait again. */
1447                 reassociate_port(ev_port, fd, ev);
1448                 break;
1449         default:
1450                 /* Right now, we just ignore the exit status. */
1451                 if (WIFEXITED(ret))
1452                         ret = WEXITSTATUS(ret);
1453                 (void) close(fd);
1454                 free(ev);
1455         }
1456 }
1457 
1458 /*
1459  * To read the output of a child probe process.
1460  */
1461 static void
1462 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev)
1463 {
1464         ilbd_hc_srv_t *srv;
1465         char buf[HC_MAX_PROBE_OUTPUT];
1466         int ret;
1467         int64_t rtt;
1468 
1469         srv = ev->ihp_srv;
1470 
1471         bzero(buf, HC_MAX_PROBE_OUTPUT);
1472         ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1);
1473         /* Should not happen since event port should have caught this. */
1474         assert(ret > 0);
1475 
1476         /*
1477          * We expect the probe command to print out the RTT only.  But
1478          * the command may misbehave and print out more than what we intend to
1479          * read in.  So need to do this check below to "flush" out all the
1480          * output from the command.
1481          */
1482         if (!ev->ihp_done) {
1483                 ev->ihp_done = B_TRUE;
1484                 /* We don't need to know about this event anymore. */
1485                 srv->shc_ev = NULL;
1486                 srv->shc_child_pid = 0;
1487                 HC_CANCEL_TIMER(srv);
1488         } else {
1489                 return;
1490         }
1491 
1492         rtt = strtoll(buf, NULL, 10);
1493 
1494         /*
1495          * -1 means the server is dead or the probe somehow fails.  Treat
1496          * them both as server is dead.
1497          */
1498         if (rtt == -1) {
1499                 ilbd_set_fail_state(srv);
1500                 return;
1501         } else if (rtt > 0) {
1502                 /* If the returned RTT value is not valid, just ignore it. */
1503                 if (rtt > 0 && rtt <= UINT_MAX) {
1504                         /* Set rtt to be the simple smoothed average. */
1505                         if (srv->shc_rtt == 0) {
1506                                 srv->shc_rtt = rtt;
1507                         } else {
1508                                 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) +
1509                                     (rtt >> 2);
1510                         }
1511                 }
1512 
1513         }
1514 
1515         switch (srv->shc_state) {
1516         case ilbd_hc_def_pinging:
1517                 srv->shc_state = ilbd_hc_probing;
1518 
1519                 /* Ping is OK, now start the probe. */
1520                 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
1521                 break;
1522         case ilbd_hc_probing:
1523                 srv->shc_fail_cnt = 0;
1524 
1525                 /* Server is dead before, re-enable it. */
1526                 if (srv->shc_status == ILB_HCS_UNREACH ||
1527                     srv->shc_status == ILB_HCS_DEAD) {
1528                         /*
1529                          * If enabling the server in kernel fails now,
1530                          * hopefully when the timer fires again later, the
1531                          * enabling can be done.
1532                          */
1533                         if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
1534                             srv->shc_hc_rule->hcr_rule->irl_name,
1535                             stat_declare_srv_alive) != ILB_STATUS_OK) {
1536                                 logerr("%s: cannot enable server in kernel: "
1537                                     " rule %s server %s", __func__,
1538                                     srv->shc_hc_rule->hcr_rule->irl_name,
1539                                     srv->shc_sg_srv->sgs_srvID);
1540                         } else {
1541                                 srv->shc_status = ILB_HCS_ALIVE;
1542                         }
1543                 } else {
1544                         srv->shc_status = ILB_HCS_ALIVE;
1545                 }
1546                 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
1547                         logerr("%s: cannot restart timer: rule %s server %s",
1548                             __func__, srv->shc_hc_rule->hcr_rule->irl_name,
1549                             srv->shc_sg_srv->sgs_srvID);
1550                         ilbd_mark_server_disabled(srv);
1551                 }
1552                 break;
1553         default:
1554                 logdebug("%s: unknown state", __func__);
1555                 break;
1556         }
1557 }
1558 
1559 /*
1560  * Handle the return event of a child probe fd.
1561  */
1562 void
1563 ilbd_hc_probe_return(int ev_port, int fd, int port_events,
1564     ilbd_hc_probe_event_t *ev)
1565 {
1566         /*
1567          * Note that there can be more than one events delivered to us at
1568          * the same time.  So we need to check them individually.
1569          */
1570         if (port_events & POLLRDNORM)
1571                 ilbd_hc_child_data(fd, ev);
1572 
1573         if (port_events & (POLLHUP|POLLERR)) {
1574                 ilbd_hc_child_hup(ev_port, fd, ev);
1575                 return;
1576         }
1577 
1578         /*
1579          * Re-associate the fd with the port so that when the child
1580          * exits, we can reap the status.
1581          */
1582         reassociate_port(ev_port, fd, ev);
1583 }