1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
26 * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
27 */
28
29 #include <sys/types.h>
30 #include <sys/socket.h>
31 #include <sys/list.h>
32 #include <sys/stropts.h>
33 #include <sys/siginfo.h>
34 #include <sys/wait.h>
35 #include <arpa/inet.h>
36 #include <netinet/in.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <strings.h>
40 #include <stddef.h>
41 #include <unistd.h>
42 #include <libilb.h>
43 #include <port.h>
44 #include <time.h>
45 #include <signal.h>
46 #include <assert.h>
47 #include <errno.h>
48 #include <spawn.h>
49 #include <fcntl.h>
50 #include <limits.h>
51 #include "libilb_impl.h"
52 #include "ilbd.h"
53
54 /* Global list of HC objects */
55 list_t ilbd_hc_list;
56
57 /* Timer queue for all hc related timers. */
58 static iu_tq_t *ilbd_hc_timer_q;
59
60 /* Indicate whether the timer needs to be updated */
61 static boolean_t hc_timer_restarted;
62
63 static void ilbd_hc_probe_timer(iu_tq_t *, void *);
64 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *);
65 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *);
66
67 #define MAX(a, b) ((a) > (b) ? (a) : (b))
68
69 /*
70 * Number of arguments passed to a probe. argc[0] is the path name of
71 * the probe.
72 */
73 #define HC_PROBE_ARGC 8
74
75 /*
76 * Max number of characters to be read from the output of a probe. It
77 * is long enough to read in a 64 bit integer.
78 */
79 #define HC_MAX_PROBE_OUTPUT 24
80
81 void
82 i_ilbd_setup_hc_list(void)
83 {
84 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t),
85 offsetof(ilbd_hc_t, ihc_link));
86 }
87
88 /*
89 * Given a hc object name, return a pointer to hc object if found.
90 */
91 ilbd_hc_t *
92 ilbd_get_hc(const char *name)
93 {
94 ilbd_hc_t *hc;
95
96 for (hc = list_head(&ilbd_hc_list); hc != NULL;
97 hc = list_next(&ilbd_hc_list, hc)) {
98 if (strcasecmp(hc->ihc_name, name) == 0)
99 return (hc);
100 }
101 return (NULL);
102 }
103
104 /*
105 * Generates an audit record for create-healthcheck,
106 * delete-healtcheck subcommands.
107 */
108 static void
109 ilbd_audit_hc_event(const char *audit_hcname,
110 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd,
111 ilb_status_t rc, ucred_t *ucredp)
112 {
113 adt_session_data_t *ah;
114 adt_event_data_t *event;
115 au_event_t flag;
116 int audit_error;
117
118 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) {
119 /*
120 * we came here from the path where ilbd incorporates
121 * the configuration that is listed in SCF:
122 * i_ilbd_read_config->ilbd_walk_hc_pgs->
123 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc
124 * We skip auditing in that case
125 */
126 logdebug("ilbd_audit_hc_event: skipping auditing");
127 return;
128 }
129
130 if (adt_start_session(&ah, NULL, 0) != 0) {
131 logerr("ilbd_audit_hc_event: adt_start_session failed");
132 exit(EXIT_FAILURE);
133 }
134 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
135 (void) adt_end_session(ah);
136 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
137 exit(EXIT_FAILURE);
138 }
139 if (cmd == ILBD_CREATE_HC)
140 flag = ADT_ilb_create_healthcheck;
141 else if (cmd == ILBD_DESTROY_HC)
142 flag = ADT_ilb_delete_healthcheck;
143
144 if ((event = adt_alloc_event(ah, flag)) == NULL) {
145 logerr("ilbd_audit_hc_event: adt_alloc_event failed");
146 exit(EXIT_FAILURE);
147 }
148 (void) memset((char *)event, 0, sizeof (adt_event_data_t));
149
150 switch (cmd) {
151 case ILBD_CREATE_HC:
152 event->adt_ilb_create_healthcheck.auth_used =
153 NET_ILB_CONFIG_AUTH;
154 event->adt_ilb_create_healthcheck.hc_test =
155 (char *)audit_hcinfo->hci_test;
156 event->adt_ilb_create_healthcheck.hc_name =
157 (char *)audit_hcinfo->hci_name;
158
159 /*
160 * If the value 0 is stored, the default values are
161 * set in the kernel. User land does not know about them
162 * So if the user does not specify them, audit record
163 * will show them as 0
164 */
165 event->adt_ilb_create_healthcheck.hc_timeout =
166 audit_hcinfo->hci_timeout;
167 event->adt_ilb_create_healthcheck.hc_count =
168 audit_hcinfo->hci_count;
169 event->adt_ilb_create_healthcheck.hc_interval =
170 audit_hcinfo->hci_interval;
171 break;
172 case ILBD_DESTROY_HC:
173 event->adt_ilb_delete_healthcheck.auth_used =
174 NET_ILB_CONFIG_AUTH;
175 event->adt_ilb_delete_healthcheck.hc_name =
176 (char *)audit_hcname;
177 break;
178 }
179
180 /* Fill in success/failure */
181 if (rc == ILB_STATUS_OK) {
182 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
183 logerr("ilbd_audit_hc_event: adt_put_event failed");
184 exit(EXIT_FAILURE);
185 }
186 } else {
187 audit_error = ilberror2auditerror(rc);
188 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
189 logerr("ilbd_audit_hc_event: adt_put_event failed");
190 exit(EXIT_FAILURE);
191 }
192 }
193 adt_free_event(event);
194 (void) adt_end_session(ah);
195 }
196
197 /*
198 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object
199 * in ilbd. The parameter ev_port is not used, refer to comments of
200 * ilbd_create_sg() in ilbd_sg.c
201 */
202 /* ARGSUSED */
203 ilb_status_t
204 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port,
205 const struct passwd *ps, ucred_t *ucredp)
206 {
207 ilbd_hc_t *hc;
208 ilb_status_t ret = ILB_STATUS_OK;
209
210 /*
211 * ps == NULL is from the daemon when it starts and load configuration
212 * ps != NULL is from client.
213 */
214 if (ps != NULL) {
215 ret = ilbd_check_client_config_auth(ps);
216 if (ret != ILB_STATUS_OK) {
217 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
218 ret, ucredp);
219 return (ret);
220 }
221 }
222
223 if (hc_info->hci_name[0] == '\0') {
224 logdebug("ilbd_create_hc: missing healthcheck info");
225 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
226 ILB_STATUS_ENOHCINFO, ucredp);
227 return (ILB_STATUS_ENOHCINFO);
228 }
229
230 hc = ilbd_get_hc(hc_info->hci_name);
231 if (hc != NULL) {
232 logdebug("ilbd_create_hc: healthcheck name %s already"
233 " exists", hc_info->hci_name);
234 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
235 ILB_STATUS_EEXIST, ucredp);
236 return (ILB_STATUS_EEXIST);
237 }
238
239 /*
240 * Sanity check on user supplied probe. The given path name
241 * must be a full path name (starts with '/') and is
242 * executable.
243 */
244 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 &&
245 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 &&
246 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 &&
247 (hc_info->hci_test[0] != '/' ||
248 access(hc_info->hci_test, X_OK) == -1)) {
249 if (errno == ENOENT) {
250 logdebug("ilbd_create_hc: user script %s doesn't "
251 "exist", hc_info->hci_test);
252 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
253 ILB_STATUS_ENOENT, ucredp);
254 return (ILB_STATUS_ENOENT);
255 } else {
256 logdebug("ilbd_create_hc: user script %s is "
257 "invalid", hc_info->hci_test);
258 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
259 ILB_STATUS_EINVAL, ucredp);
260 return (ILB_STATUS_EINVAL);
261 }
262 }
263
264 /* Create and add the hc object */
265 hc = calloc(1, sizeof (ilbd_hc_t));
266 if (hc == NULL) {
267 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
268 ILB_STATUS_ENOMEM, ucredp);
269 return (ILB_STATUS_ENOMEM);
270 }
271 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t));
272 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0)
273 hc->ihc_test_type = ILBD_HC_TCP;
274 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0)
275 hc->ihc_test_type = ILBD_HC_UDP;
276 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0)
277 hc->ihc_test_type = ILBD_HC_PING;
278 else
279 hc->ihc_test_type = ILBD_HC_USER;
280 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t),
281 offsetof(ilbd_hc_rule_t, hcr_link));
282
283 /* Update SCF */
284 if (ps != NULL) {
285 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) !=
286 ILB_STATUS_OK) {
287 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
288 ret, ucredp);
289 list_destroy(&hc->ihc_rules);
290 free(hc);
291 return (ret);
292 }
293 }
294
295 /* Everything is fine, now add it to the global list. */
296 list_insert_tail(&ilbd_hc_list, hc);
297 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp);
298 return (ret);
299 }
300
301 /*
302 * Given a name of a hc object, destroy it.
303 */
304 ilb_status_t
305 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps,
306 ucred_t *ucredp)
307 {
308 ilb_status_t ret;
309 ilbd_hc_t *hc;
310
311 /*
312 * No need to check ps == NULL, daemon won't call any destroy func
313 * at start up.
314 */
315 ret = ilbd_check_client_config_auth(ps);
316 if (ret != ILB_STATUS_OK) {
317 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
318 ret, ucredp);
319 return (ret);
320 }
321
322 hc = ilbd_get_hc(hc_name);
323 if (hc == NULL) {
324 logdebug("ilbd_destroy_hc: healthcheck %s does not exist",
325 hc_name);
326 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
327 ILB_STATUS_ENOENT, ucredp);
328 return (ILB_STATUS_ENOENT);
329 }
330
331 /* If hc is in use, cannot delete it */
332 if (hc->ihc_rule_cnt > 0) {
333 logdebug("ilbd_destroy_hc: healthcheck %s is associated"
334 " with a rule - cannot remove", hc_name);
335 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
336 ILB_STATUS_INUSE, ucredp);
337 return (ILB_STATUS_INUSE);
338 }
339
340 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) !=
341 ILB_STATUS_OK) {
342 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s "
343 "property group", hc_name);
344 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
345 ret, ucredp);
346 return (ret);
347 }
348
349 list_remove(&ilbd_hc_list, hc);
350 list_destroy(&hc->ihc_rules);
351 free(hc);
352 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp);
353 return (ret);
354 }
355
356 /*
357 * Given a hc object name, return its information. Used by libilb to
358 * get hc info.
359 */
360 ilb_status_t
361 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz)
362 {
363 ilbd_hc_t *hc;
364 ilb_hc_info_t *hc_info;
365 ilb_comm_t *ic = (ilb_comm_t *)rbuf;
366
367 hc = ilbd_get_hc(hc_name);
368 if (hc == NULL) {
369 logdebug("%s: healthcheck %s does not exist", __func__,
370 hc_name);
371 return (ILB_STATUS_ENOENT);
372 }
373 ilbd_reply_ok(rbuf, rbufsz);
374 hc_info = (ilb_hc_info_t *)&ic->ic_data;
375
376 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name));
377 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test));
378 hc_info->hci_timeout = hc->ihc_timeout;
379 hc_info->hci_count = hc->ihc_count;
380 hc_info->hci_interval = hc->ihc_interval;
381 hc_info->hci_def_ping = hc->ihc_def_ping;
382
383 *rbufsz += sizeof (ilb_hc_info_t);
384
385 return (ILB_STATUS_OK);
386 }
387
388 static void
389 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule,
390 const char *rulename)
391 {
392 ilbd_hc_srv_t *tmp_srv;
393 ilb_hc_srv_t *dst_srv;
394 ilb_hc_rule_srv_t *srvs;
395 size_t tmp_rbufsz;
396 int i;
397
398 tmp_rbufsz = *rbufsz;
399 /* Set up the reply buffer. rbufsz will be set to the new size. */
400 ilbd_reply_ok(rbuf, rbufsz);
401
402 /* Calculate how much space is left for holding server info. */
403 *rbufsz += sizeof (ilb_hc_rule_srv_t);
404 tmp_rbufsz -= *rbufsz;
405
406 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
407
408 tmp_srv = list_head(&hc_rule->hcr_servers);
409 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) {
410 dst_srv = &srvs->rs_srvs[i];
411
412 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ);
413 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID,
414 ILB_NAMESZ);
415 (void) strlcpy(dst_srv->hcs_hc_name,
416 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ);
417 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr;
418 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt;
419 dst_srv->hcs_status = tmp_srv->shc_status;
420 dst_srv->hcs_rtt = tmp_srv->shc_rtt;
421 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime;
422 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime;
423
424 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv);
425 tmp_rbufsz -= sizeof (*dst_srv);
426 }
427 srvs->rs_num_srvs = i;
428 *rbufsz += i * sizeof (*dst_srv);
429 }
430
431 /*
432 * Given a rule name, return the hc status of its servers.
433 */
434 ilb_status_t
435 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz)
436 {
437 ilbd_hc_t *hc;
438 ilbd_hc_rule_t *hc_rule;
439
440 for (hc = list_head(&ilbd_hc_list); hc != NULL;
441 hc = list_next(&ilbd_hc_list, hc)) {
442 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
443 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
444 if (strcasecmp(hc_rule->hcr_rule->irl_name,
445 rulename) != 0) {
446 continue;
447 }
448 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename);
449 return (ILB_STATUS_OK);
450 }
451 }
452 return (ILB_STATUS_RULE_NO_HC);
453 }
454
455 /*
456 * Initialize the hc timer and associate the notification of timeout to
457 * the given event port.
458 */
459 void
460 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj)
461 {
462 struct sigevent sigev;
463 port_notify_t notify;
464
465 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) {
466 logerr("%s: cannot create hc timer queue", __func__);
467 exit(EXIT_FAILURE);
468 }
469 hc_timer_restarted = B_FALSE;
470
471 ev_obj->ev = ILBD_EVENT_TIMER;
472 ev_obj->timerid = -1;
473
474 notify.portnfy_port = ev_port;
475 notify.portnfy_user = ev_obj;
476 sigev.sigev_notify = SIGEV_PORT;
477 sigev.sigev_value.sival_ptr = ¬ify;
478 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) {
479 logerr("%s: cannot create timer", __func__);
480 exit(EXIT_FAILURE);
481 }
482 }
483
484 /*
485 * HC timeout handler.
486 */
487 void
488 ilbd_hc_timeout(void)
489 {
490 (void) iu_expire_timers(ilbd_hc_timer_q);
491 hc_timer_restarted = B_TRUE;
492 }
493
494 /*
495 * Set up the timer to fire at the earliest timeout.
496 */
497 void
498 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj)
499 {
500 itimerspec_t itimeout;
501 int timeout;
502
503 /*
504 * There is no change on the timer list, so no need to set up the
505 * timer again.
506 */
507 if (!hc_timer_restarted)
508 return;
509
510 restart:
511 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) {
512 hc_timer_restarted = B_FALSE;
513 return;
514 } else if (timeout == 0) {
515 /*
516 * Handle the timeout immediately. After that (clearing all
517 * the expired timers), check to see if there are still
518 * timers running. If yes, start them.
519 */
520 (void) iu_expire_timers(ilbd_hc_timer_q);
521 goto restart;
522 }
523
524 itimeout.it_value.tv_sec = timeout / MILLISEC + 1;
525 itimeout.it_value.tv_nsec = 0;
526 itimeout.it_interval.tv_sec = 0;
527 itimeout.it_interval.tv_nsec = 0;
528
529 /*
530 * Failure to set a timeout is "OK" since hopefully there will be
531 * other events and timer_settime() will be called again. So
532 * we will only miss some timeouts. But in the worst case, no event
533 * will happen and ilbd will get stuck...
534 */
535 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1)
536 logerr("%s: cannot set timer", __func__);
537 hc_timer_restarted = B_FALSE;
538 }
539
540 /*
541 * Kill the probe process of a server.
542 */
543 static void
544 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv)
545 {
546 /*
547 * First dissociate the fd from the event port. It should not
548 * fail.
549 */
550 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD,
551 srv->shc_child_fd) != 0) {
552 logdebug("%s: port_dissociate: %s", __func__, strerror(errno));
553 }
554 (void) close(srv->shc_child_fd);
555 free(srv->shc_ev);
556 srv->shc_ev = NULL;
557
558 /* Then kill the probe process. */
559 if (kill(srv->shc_child_pid, SIGKILL) != 0) {
560 logerr("%s: rule %s server %s: %s", __func__,
561 srv->shc_hc_rule->hcr_rule->irl_name,
562 srv->shc_sg_srv->sgs_srvID, strerror(errno));
563 }
564 /* Should not fail... */
565 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) {
566 logdebug("%s: waitpid: rule %s server %s", __func__,
567 srv->shc_hc_rule->hcr_rule->irl_name,
568 srv->shc_sg_srv->sgs_srvID);
569 }
570 srv->shc_child_pid = 0;
571 }
572
573 /*
574 * Disable the server, either because the server is dead or because a timer
575 * cannot be started for this server. Note that this only affects the
576 * transient configuration, meaning only in memory. The persistent
577 * configuration is not affected.
578 */
579 static void
580 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv)
581 {
582 srv->shc_status = ILB_HCS_DISABLED;
583
584 /* Disable the server in kernel. */
585 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
586 srv->shc_hc_rule->hcr_rule->irl_name,
587 stat_declare_srv_dead) != ILB_STATUS_OK) {
588 logerr("%s: cannot disable server in kernel: rule %s "
589 "server %s", __func__,
590 srv->shc_hc_rule->hcr_rule->irl_name,
591 srv->shc_sg_srv->sgs_srvID);
592 }
593 }
594
595 /*
596 * A probe fails, set the state of the server.
597 */
598 static void
599 ilbd_set_fail_state(ilbd_hc_srv_t *srv)
600 {
601 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) {
602 /* Probe again */
603 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
604 return;
605 }
606
607 logdebug("%s: rule %s server %s fails %u", __func__,
608 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID,
609 srv->shc_fail_cnt);
610
611 /*
612 * If this is a ping test, mark the server as
613 * unreachable instead of dead.
614 */
615 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING ||
616 srv->shc_state == ilbd_hc_def_pinging) {
617 srv->shc_status = ILB_HCS_UNREACH;
618 } else {
619 srv->shc_status = ILB_HCS_DEAD;
620 }
621
622 /* Disable the server in kernel. */
623 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
624 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) !=
625 ILB_STATUS_OK) {
626 logerr("%s: cannot disable server in kernel: rule %s "
627 "server %s", __func__,
628 srv->shc_hc_rule->hcr_rule->irl_name,
629 srv->shc_sg_srv->sgs_srvID);
630 }
631
632 /* Still keep probing in case the server is alive again. */
633 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
634 /* Only thing to do is to disable the server... */
635 logerr("%s: cannot restart timer: rule %s server %s", __func__,
636 srv->shc_hc_rule->hcr_rule->irl_name,
637 srv->shc_sg_srv->sgs_srvID);
638 srv->shc_status = ILB_HCS_DISABLED;
639 }
640 }
641
642 /*
643 * A probe process has not returned for the ihc_timeout period, we should
644 * kill it. This function is the handler of this.
645 */
646 /* ARGSUSED */
647 static void
648 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg)
649 {
650 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
651
652 ilbd_hc_kill_probe(srv);
653 ilbd_set_fail_state(srv);
654 }
655
656 /*
657 * Probe timeout handler. Send out the appropriate probe.
658 */
659 /* ARGSUSED */
660 static void
661 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg)
662 {
663 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
664
665 /*
666 * If starting the probe fails, just pretend that the timeout has
667 * extended.
668 */
669 if (!ilbd_run_probe(srv)) {
670 /*
671 * If we cannot restart the timer, the only thing we can do
672 * is to disable this server. Hopefully the sys admin will
673 * notice this and enable this server again later.
674 */
675 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
676 logerr("%s: cannot restart timer: rule %s server %s, "
677 "disabling it", __func__,
678 srv->shc_hc_rule->hcr_rule->irl_name,
679 srv->shc_sg_srv->sgs_srvID);
680 ilbd_mark_server_disabled(srv);
681 }
682 return;
683 }
684
685 /*
686 * Similar to above, if kill timer cannot be started, disable the
687 * server.
688 */
689 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q,
690 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) {
691 logerr("%s: cannot start kill timer: rule %s server %s, "
692 "disabling it", __func__,
693 srv->shc_hc_rule->hcr_rule->irl_name,
694 srv->shc_sg_srv->sgs_srvID);
695 ilbd_mark_server_disabled(srv);
696 }
697 hc_timer_restarted = B_TRUE;
698 }
699
700 /* Restart the periodic timer for a given server. */
701 static ilb_status_t
702 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv)
703 {
704 int timeout;
705
706 /* Don't allow the timeout interval to be less than 1s */
707 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() %
708 (hc->ihc_interval + 1)), 1);
709
710 /*
711 * If the probe is actually a ping probe, there is no need to
712 * do default pinging. Just skip the step.
713 */
714 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING)
715 srv->shc_state = ilbd_hc_def_pinging;
716 else
717 srv->shc_state = ilbd_hc_probing;
718 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout,
719 ilbd_hc_probe_timer, srv);
720
721 if (srv->shc_tid == -1)
722 return (ILB_STATUS_TIMER);
723 srv->shc_lasttime = time(NULL);
724 srv->shc_nexttime = time(NULL) + timeout;
725
726 hc_timer_restarted = B_TRUE;
727 return (ILB_STATUS_OK);
728 }
729
730 /* Helper routine to associate a server with its hc object. */
731 static ilb_status_t
732 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule,
733 const ilb_sg_srv_t *srv, int ev_port)
734 {
735 ilbd_hc_srv_t *new_srv;
736 ilb_status_t ret;
737
738 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL)
739 return (ILB_STATUS_ENOMEM);
740 new_srv->shc_hc = hc;
741 new_srv->shc_hc_rule = hc_rule;
742 new_srv->shc_sg_srv = srv;
743 new_srv->shc_ev_port = ev_port;
744 new_srv->shc_tid = -1;
745 new_srv->shc_nexttime = time(NULL);
746 new_srv->shc_lasttime = new_srv->shc_nexttime;
747
748 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) &&
749 ILB_IS_SRV_ENABLED(srv->sgs_flags)) {
750 new_srv->shc_status = ILB_HCS_UNINIT;
751 ret = ilbd_hc_restart_timer(hc, new_srv);
752 if (ret != ILB_STATUS_OK) {
753 free(new_srv);
754 return (ret);
755 }
756 } else {
757 new_srv->shc_status = ILB_HCS_DISABLED;
758 }
759
760 list_insert_tail(&hc_rule->hcr_servers, new_srv);
761 return (ILB_STATUS_OK);
762 }
763
764 /* Handy macro to cancel a server's timer. */
765 #define HC_CANCEL_TIMER(srv) \
766 { \
767 void *arg; \
768 int ret; \
769 if ((srv)->shc_tid != -1) { \
770 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \
771 (srv)->shc_tid = -1; \
772 assert(ret == 1); \
773 assert(arg == (srv)); \
774 } \
775 hc_timer_restarted = B_TRUE; \
776 }
777
778 /* Helper routine to dissociate a server from its hc object. */
779 static ilb_status_t
780 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv)
781 {
782 ilbd_hc_srv_t *tmp_srv;
783
784 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
785 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
786 if (tmp_srv->shc_sg_srv == srv) {
787 list_remove(&hc_rule->hcr_servers, tmp_srv);
788 HC_CANCEL_TIMER(tmp_srv);
789 if (tmp_srv->shc_child_pid != 0)
790 ilbd_hc_kill_probe(tmp_srv);
791 free(tmp_srv);
792 return (ILB_STATUS_OK);
793 }
794 }
795 return (ILB_STATUS_ENOENT);
796 }
797
798 /* Helper routine to dissociate all servers of a rule from its hc object. */
799 static void
800 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule)
801 {
802 ilbd_hc_srv_t *srv;
803
804 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) {
805 HC_CANCEL_TIMER(srv);
806 if (srv->shc_child_pid != 0)
807 ilbd_hc_kill_probe(srv);
808 free(srv);
809 }
810 }
811
812 /* Associate a rule with its hc object. */
813 ilb_status_t
814 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port)
815 {
816 ilbd_hc_t *hc;
817 ilbd_hc_rule_t *hc_rule;
818 ilb_status_t ret;
819 ilbd_sg_t *sg;
820 ilbd_srv_t *ilbd_srv;
821
822 /* The rule is assumed to be initialized appropriately. */
823 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
824 logdebug("ilbd_hc_associate_rule: healthcheck %s does not "
825 "exist", rule->irl_hcname);
826 return (ILB_STATUS_ENOHCINFO);
827 }
828 if ((hc->ihc_test_type == ILBD_HC_TCP &&
829 rule->irl_proto != IPPROTO_TCP) ||
830 (hc->ihc_test_type == ILBD_HC_UDP &&
831 rule->irl_proto != IPPROTO_UDP)) {
832 return (ILB_STATUS_RULE_HC_MISMATCH);
833 }
834 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) {
835 logdebug("ilbd_hc_associate_rule: out of memory");
836 return (ILB_STATUS_ENOMEM);
837 }
838
839 hc_rule->hcr_rule = rule;
840 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t),
841 offsetof(ilbd_hc_srv_t, shc_srv_link));
842
843 /* Add all the servers. */
844 sg = rule->irl_sg;
845 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL;
846 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) {
847 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv,
848 ev_port)) != ILB_STATUS_OK) {
849 /* Remove all previously added servers */
850 ilbd_hc_srv_rem_all(hc_rule);
851 list_destroy(&hc_rule->hcr_servers);
852 free(hc_rule);
853 return (ret);
854 }
855 }
856 list_insert_tail(&hc->ihc_rules, hc_rule);
857 hc->ihc_rule_cnt++;
858
859 return (ILB_STATUS_OK);
860 }
861
862 /* Dissociate a rule from its hc object. */
863 ilb_status_t
864 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule)
865 {
866 ilbd_hc_t *hc;
867 ilbd_hc_rule_t *hc_rule;
868
869 /* The rule is assumed to be initialized appropriately. */
870 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
871 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not "
872 "exist", rule->irl_hcname);
873 return (ILB_STATUS_ENOENT);
874 }
875 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
876 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
877 if (hc_rule->hcr_rule == rule)
878 break;
879 }
880 if (hc_rule == NULL) {
881 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated "
882 "with healtcheck %s", rule->irl_hcname, hc->ihc_name);
883 return (ILB_STATUS_ENOENT);
884 }
885 ilbd_hc_srv_rem_all(hc_rule);
886 list_remove(&hc->ihc_rules, hc_rule);
887 hc->ihc_rule_cnt--;
888 list_destroy(&hc_rule->hcr_servers);
889 free(hc_rule);
890 return (ILB_STATUS_OK);
891 }
892
893 /*
894 * Given a hc object name and a rule, check to see if the rule is associated
895 * with the hc object. If it is, the hc object is returned in **hc and the
896 * ilbd_hc_rule_t is returned in **hc_rule.
897 */
898 static boolean_t
899 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule,
900 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule)
901 {
902 ilbd_hc_t *tmp_hc;
903 ilbd_hc_rule_t *tmp_hc_rule;
904
905 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL)
906 return (B_FALSE);
907 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL;
908 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) {
909 if (tmp_hc_rule->hcr_rule == rule) {
910 *hc = tmp_hc;
911 *hc_rule = tmp_hc_rule;
912 return (B_TRUE);
913 }
914 }
915 return (B_FALSE);
916 }
917
918 /* Associate a server with its hc object. */
919 ilb_status_t
920 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
921 int ev_port)
922 {
923 ilbd_hc_t *hc;
924 ilbd_hc_rule_t *hc_rule;
925
926 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
927 return (ILB_STATUS_ENOENT);
928 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port));
929 }
930
931 /* Dissociate a server from its hc object. */
932 ilb_status_t
933 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
934 {
935 ilbd_hc_t *hc;
936 ilbd_hc_rule_t *hc_rule;
937
938 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
939 return (ILB_STATUS_ENOENT);
940 return (ilbd_hc_srv_rem(hc_rule, srv));
941 }
942
943 /* Helper routine to enable/disable a server's hc probe. */
944 static ilb_status_t
945 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
946 boolean_t enable)
947 {
948 ilbd_hc_t *hc;
949 ilbd_hc_rule_t *hc_rule;
950 ilbd_hc_srv_t *tmp_srv;
951 ilb_status_t ret;
952
953 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
954 return (ILB_STATUS_ENOENT);
955 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
956 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
957 if (tmp_srv->shc_sg_srv != srv) {
958 continue;
959 }
960 if (enable) {
961 if (tmp_srv->shc_status == ILB_HCS_DISABLED) {
962 ret = ilbd_hc_restart_timer(hc, tmp_srv);
963 if (ret != ILB_STATUS_OK) {
964 logerr("%s: cannot start timers for "
965 "rule %s server %s", __func__,
966 rule->irl_name,
967 tmp_srv->shc_sg_srv->sgs_srvID);
968 return (ret);
969 }
970 /* Start from fresh... */
971 tmp_srv->shc_status = ILB_HCS_UNINIT;
972 tmp_srv->shc_rtt = 0;
973 tmp_srv->shc_fail_cnt = 0;
974 }
975 } else {
976 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
977 tmp_srv->shc_status = ILB_HCS_DISABLED;
978 HC_CANCEL_TIMER(tmp_srv);
979 if (tmp_srv->shc_child_pid != 0)
980 ilbd_hc_kill_probe(tmp_srv);
981 }
982 }
983 return (ILB_STATUS_OK);
984 }
985 return (ILB_STATUS_ENOENT);
986 }
987
988 ilb_status_t
989 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
990 {
991 return (ilbd_hc_toggle_server(rule, srv, B_TRUE));
992 }
993
994 ilb_status_t
995 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
996 {
997 return (ilbd_hc_toggle_server(rule, srv, B_FALSE));
998 }
999
1000 /*
1001 * Helper routine to enable/disable a rule's hc probe (including all its
1002 * servers).
1003 */
1004 static ilb_status_t
1005 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable)
1006 {
1007 ilbd_hc_t *hc;
1008 ilbd_hc_rule_t *hc_rule;
1009 ilbd_hc_srv_t *tmp_srv;
1010 int ret;
1011
1012 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
1013 return (ILB_STATUS_ENOENT);
1014
1015 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
1016 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
1017 if (enable) {
1018 /*
1019 * If the server is disabled in the rule, do not
1020 * restart its timer.
1021 */
1022 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1023 ILB_IS_SRV_ENABLED(
1024 tmp_srv->shc_sg_srv->sgs_flags)) {
1025 ret = ilbd_hc_restart_timer(hc, tmp_srv);
1026 if (ret != ILB_STATUS_OK) {
1027 logerr("%s: cannot start timers for "
1028 "rule %s server %s", __func__,
1029 rule->irl_name,
1030 tmp_srv->shc_sg_srv->sgs_srvID);
1031 goto rollback;
1032 } else {
1033 /* Start from fresh... */
1034 tmp_srv->shc_status = ILB_HCS_UNINIT;
1035 tmp_srv->shc_rtt = 0;
1036 tmp_srv->shc_fail_cnt = 0;
1037 }
1038 }
1039 } else {
1040 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1041 HC_CANCEL_TIMER(tmp_srv);
1042 tmp_srv->shc_status = ILB_HCS_DISABLED;
1043 if (tmp_srv->shc_child_pid != 0)
1044 ilbd_hc_kill_probe(tmp_srv);
1045 }
1046 }
1047 }
1048 return (ILB_STATUS_OK);
1049 rollback:
1050 enable = !enable;
1051 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv);
1052 tmp_srv != NULL;
1053 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) {
1054 if (enable) {
1055 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1056 ILB_IS_SRV_ENABLED(
1057 tmp_srv->shc_sg_srv->sgs_flags)) {
1058 (void) ilbd_hc_restart_timer(hc, tmp_srv);
1059 tmp_srv->shc_status = ILB_HCS_UNINIT;
1060 tmp_srv->shc_rtt = 0;
1061 tmp_srv->shc_fail_cnt = 0;
1062 }
1063 } else {
1064 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1065 HC_CANCEL_TIMER(tmp_srv);
1066 tmp_srv->shc_status = ILB_HCS_DISABLED;
1067 if (tmp_srv->shc_child_pid != 0)
1068 ilbd_hc_kill_probe(tmp_srv);
1069 }
1070 }
1071 }
1072 return (ret);
1073 }
1074
1075 ilb_status_t
1076 ilbd_hc_enable_rule(const ilbd_rule_t *rule)
1077 {
1078 return (ilbd_hc_toggle_rule(rule, B_TRUE));
1079 }
1080
1081 ilb_status_t
1082 ilbd_hc_disable_rule(const ilbd_rule_t *rule)
1083 {
1084 return (ilbd_hc_toggle_rule(rule, B_FALSE));
1085 }
1086
1087 static const char *
1088 topo_2_str(ilb_topo_t topo)
1089 {
1090 switch (topo) {
1091 case ILB_TOPO_DSR:
1092 return ("DSR");
1093 case ILB_TOPO_NAT:
1094 return ("NAT");
1095 case ILB_TOPO_HALF_NAT:
1096 return ("HALF_NAT");
1097 default:
1098 /* Should not happen. */
1099 logerr("%s: unknown topology", __func__);
1100 break;
1101 }
1102 return ("");
1103 }
1104
1105 /*
1106 * Create the argument list to be passed to a hc probe command.
1107 * The passed in argv is assumed to have HC_PROBE_ARGC elements.
1108 */
1109 static boolean_t
1110 create_argv(ilbd_hc_srv_t *srv, char *argv[])
1111 {
1112 char buf[INET6_ADDRSTRLEN];
1113 ilbd_rule_t const *rule;
1114 ilb_sg_srv_t const *sg_srv;
1115 struct in_addr v4_addr;
1116 in_port_t port;
1117 int i;
1118
1119 rule = srv->shc_hc_rule->hcr_rule;
1120 sg_srv = srv->shc_sg_srv;
1121
1122 if (srv->shc_state == ilbd_hc_def_pinging) {
1123 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL)
1124 return (B_FALSE);
1125 } else {
1126 switch (srv->shc_hc->ihc_test_type) {
1127 case ILBD_HC_USER:
1128 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL)
1129 return (B_FALSE);
1130 break;
1131 case ILBD_HC_TCP:
1132 case ILBD_HC_UDP:
1133 if ((argv[0] = strdup(ILB_PROBE_PROTO)) ==
1134 NULL) {
1135 return (B_FALSE);
1136 }
1137 break;
1138 case ILBD_HC_PING:
1139 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) {
1140 return (B_FALSE);
1141 }
1142 break;
1143 }
1144 }
1145
1146 /*
1147 * argv[1] is the VIP.
1148 *
1149 * Right now, the VIP and the backend server addresses should be
1150 * in the same IP address family. Here we don't do that in case
1151 * this assumption is changed in future.
1152 */
1153 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) {
1154 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr);
1155 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1156 goto cleanup;
1157 } else {
1158 if (inet_ntop(AF_INET6, &rule->irl_vip, buf,
1159 sizeof (buf)) == NULL) {
1160 goto cleanup;
1161 }
1162 }
1163 if ((argv[1] = strdup(buf)) == NULL)
1164 goto cleanup;
1165
1166 /*
1167 * argv[2] is the backend server address.
1168 */
1169 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) {
1170 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr);
1171 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1172 goto cleanup;
1173 } else {
1174 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf,
1175 sizeof (buf)) == NULL) {
1176 goto cleanup;
1177 }
1178 }
1179 if ((argv[2] = strdup(buf)) == NULL)
1180 goto cleanup;
1181
1182 /*
1183 * argv[3] is the transport protocol used in the rule.
1184 */
1185 switch (rule->irl_proto) {
1186 case IPPROTO_TCP:
1187 argv[3] = strdup("TCP");
1188 break;
1189 case IPPROTO_UDP:
1190 argv[3] = strdup("UDP");
1191 break;
1192 default:
1193 logerr("%s: unknown protocol", __func__);
1194 goto cleanup;
1195 }
1196 if (argv[3] == NULL)
1197 goto cleanup;
1198
1199 /*
1200 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT.
1201 */
1202 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL)
1203 goto cleanup;
1204
1205 /*
1206 * argv[5] is the port range. Right now, there should only be 1 port.
1207 */
1208 switch (rule->irl_hcpflag) {
1209 case ILB_HCI_PROBE_FIX:
1210 port = ntohs(rule->irl_hcport);
1211 break;
1212 case ILB_HCI_PROBE_ANY: {
1213 in_port_t min, max;
1214
1215 if (ntohs(sg_srv->sgs_minport) == 0) {
1216 min = ntohs(rule->irl_minport);
1217 max = ntohs(rule->irl_maxport);
1218 } else {
1219 min = ntohs(sg_srv->sgs_minport);
1220 max = ntohs(sg_srv->sgs_maxport);
1221 }
1222 if (max > min)
1223 port = min + gethrtime() % (max - min + 1);
1224 else
1225 port = min;
1226 break;
1227 }
1228 default:
1229 logerr("%s: unknown HC flag", __func__);
1230 goto cleanup;
1231 }
1232 (void) sprintf(buf, "%d", port);
1233 if ((argv[5] = strdup(buf)) == NULL)
1234 goto cleanup;
1235
1236 /*
1237 * argv[6] is the probe timeout.
1238 */
1239 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout);
1240 if ((argv[6] = strdup(buf)) == NULL)
1241 goto cleanup;
1242
1243 argv[7] = NULL;
1244 return (B_TRUE);
1245
1246 cleanup:
1247 for (i = 0; i < HC_PROBE_ARGC; i++) {
1248 if (argv[i] != NULL)
1249 free(argv[i]);
1250 }
1251 return (B_FALSE);
1252 }
1253
1254 static void
1255 destroy_argv(char *argv[])
1256 {
1257 int i;
1258
1259 for (i = 0; argv[i] != NULL; i++)
1260 free(argv[i]);
1261 }
1262
1263 /* Spawn a process to run the hc probe on the given server. */
1264 static boolean_t
1265 ilbd_run_probe(ilbd_hc_srv_t *srv)
1266 {
1267 posix_spawn_file_actions_t fd_actions;
1268 posix_spawnattr_t attr;
1269 sigset_t child_sigset;
1270 int fds[2];
1271 int fdflags;
1272 pid_t pid;
1273 char *child_argv[HC_PROBE_ARGC];
1274 ilbd_hc_probe_event_t *probe_ev;
1275 char *probe_name;
1276
1277 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *));
1278 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) {
1279 logdebug("ilbd_run_probe: calloc");
1280 return (B_FALSE);
1281 }
1282
1283 /* Set up a pipe to get output from probe command. */
1284 if (pipe(fds) < 0) {
1285 logdebug("ilbd_run_probe: cannot create pipe");
1286 free(probe_ev);
1287 return (B_FALSE);
1288 }
1289 /* Set our side of the pipe to be non-blocking */
1290 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) {
1291 logdebug("ilbd_run_probe: fcntl(F_GETFL)");
1292 goto cleanup_noactions;
1293 }
1294 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) {
1295 logdebug("ilbd_run_probe: fcntl(F_SETFL)");
1296 goto cleanup_noactions;
1297 }
1298
1299 if (posix_spawn_file_actions_init(&fd_actions) != 0) {
1300 logdebug("ilbd_run_probe: posix_spawn_file_actions_init");
1301 goto cleanup_noactions;
1302 }
1303 if (posix_spawnattr_init(&attr) != 0) {
1304 logdebug("ilbd_run_probe: posix_spawnattr_init");
1305 goto cleanup_noattr;
1306 }
1307 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) {
1308 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1309 goto cleanup;
1310 }
1311 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1],
1312 STDOUT_FILENO) != 0) {
1313 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2");
1314 goto cleanup;
1315 }
1316 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) {
1317 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1318 goto cleanup;
1319 }
1320
1321 /* Reset all signal handling of the child to default. */
1322 (void) sigfillset(&child_sigset);
1323 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) {
1324 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault");
1325 goto cleanup;
1326 }
1327 /* Don't want SIGCHLD. */
1328 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP|
1329 POSIX_SPAWN_SETSIGDEF) != 0) {
1330 logdebug("ilbd_run_probe: posix_spawnattr_setflags");
1331 goto cleanup;
1332 }
1333
1334 if (!create_argv(srv, child_argv)) {
1335 logdebug("ilbd_run_probe: create_argv");
1336 goto cleanup;
1337 }
1338
1339 /*
1340 * If we are doing default pinging or not using a user supplied
1341 * probe, we should execute our standard supplied probe. The
1342 * supplied probe command handles all types of probes. And the
1343 * type used depends on argv[0], as filled in by create_argv().
1344 */
1345 if (srv->shc_state == ilbd_hc_def_pinging ||
1346 srv->shc_hc->ihc_test_type != ILBD_HC_USER) {
1347 probe_name = ILB_PROBE_PROTO;
1348 } else {
1349 probe_name = srv->shc_hc->ihc_test;
1350 }
1351 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv,
1352 NULL) != 0) {
1353 logerr("%s: posix_spawn: %s for server %s: %s", __func__,
1354 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID,
1355 strerror(errno));
1356 goto cleanup;
1357 }
1358
1359 (void) posix_spawnattr_destroy(&attr);
1360 (void) posix_spawn_file_actions_destroy(&fd_actions);
1361 (void) close(fds[1]);
1362 srv->shc_child_pid = pid;
1363 srv->shc_child_fd = fds[0];
1364 srv->shc_ev = probe_ev;
1365
1366 probe_ev->ihp_ev = ILBD_EVENT_PROBE;
1367 probe_ev->ihp_srv = srv;
1368 probe_ev->ihp_pid = pid;
1369 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0],
1370 POLLRDNORM, probe_ev) != 0) {
1371 /*
1372 * Need to kill the child. It will free the srv->shc_ev,
1373 * which is probe_ev. So set probe_ev to NULL.
1374 */
1375 ilbd_hc_kill_probe(srv);
1376 probe_ev = NULL;
1377 /* posix_spawn attrs & actions already destroyed. */
1378 goto cleanup_noactions;
1379 }
1380 destroy_argv(child_argv);
1381
1382 return (B_TRUE);
1383
1384 cleanup:
1385 (void) posix_spawnattr_destroy(&attr);
1386 cleanup_noattr:
1387 (void) posix_spawn_file_actions_destroy(&fd_actions);
1388 cleanup_noactions:
1389 (void) close(fds[0]);
1390 (void) close(fds[1]);
1391 destroy_argv(child_argv);
1392 if (probe_ev != NULL)
1393 free(probe_ev);
1394 return (B_FALSE);
1395 }
1396
1397 /*
1398 * Called by ild_hc_probe_return() to re-associate the fd to a child to
1399 * the event port.
1400 */
1401 static void
1402 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1403 {
1404 if (port_associate(ev_port, PORT_SOURCE_FD, fd,
1405 POLLRDNORM, ev) != 0) {
1406 /*
1407 * If we cannot reassociate with the port, the only
1408 * thing we can do now is to kill the child and
1409 * do a blocking wait here...
1410 */
1411 logdebug("%s: port_associate: %s", __func__, strerror(errno));
1412 if (kill(ev->ihp_pid, SIGKILL) != 0)
1413 logerr("%s: kill: %s", __func__, strerror(errno));
1414 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid)
1415 logdebug("%s: waitpid: %s", __func__, strerror(errno));
1416 free(ev);
1417 }
1418 }
1419
1420 /*
1421 * To handle a child probe process hanging up.
1422 */
1423 static void
1424 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1425 {
1426 ilbd_hc_srv_t *srv;
1427 pid_t ret_pid;
1428 int ret;
1429
1430 srv = ev->ihp_srv;
1431
1432 if (!ev->ihp_done) {
1433 /* ilbd does not care about this process anymore ... */
1434 ev->ihp_done = B_TRUE;
1435 srv->shc_ev = NULL;
1436 srv->shc_child_pid = 0;
1437 HC_CANCEL_TIMER(srv);
1438 ilbd_set_fail_state(srv);
1439 }
1440 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG);
1441 switch (ret_pid) {
1442 case -1:
1443 logperror("ilbd_hc_child_hup: waitpid");
1444 /* FALLTHROUGH */
1445 case 0:
1446 /* The child has not completed the exit. Wait again. */
1447 reassociate_port(ev_port, fd, ev);
1448 break;
1449 default:
1450 /* Right now, we just ignore the exit status. */
1451 if (WIFEXITED(ret))
1452 ret = WEXITSTATUS(ret);
1453 (void) close(fd);
1454 free(ev);
1455 }
1456 }
1457
1458 /*
1459 * To read the output of a child probe process.
1460 */
1461 static void
1462 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev)
1463 {
1464 ilbd_hc_srv_t *srv;
1465 char buf[HC_MAX_PROBE_OUTPUT];
1466 int ret;
1467 int64_t rtt;
1468
1469 srv = ev->ihp_srv;
1470
1471 bzero(buf, HC_MAX_PROBE_OUTPUT);
1472 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1);
1473 /* Should not happen since event port should have caught this. */
1474 assert(ret > 0);
1475
1476 /*
1477 * We expect the probe command to print out the RTT only. But
1478 * the command may misbehave and print out more than what we intend to
1479 * read in. So need to do this check below to "flush" out all the
1480 * output from the command.
1481 */
1482 if (!ev->ihp_done) {
1483 ev->ihp_done = B_TRUE;
1484 /* We don't need to know about this event anymore. */
1485 srv->shc_ev = NULL;
1486 srv->shc_child_pid = 0;
1487 HC_CANCEL_TIMER(srv);
1488 } else {
1489 return;
1490 }
1491
1492 rtt = strtoll(buf, NULL, 10);
1493
1494 /*
1495 * -1 means the server is dead or the probe somehow fails. Treat
1496 * them both as server is dead.
1497 */
1498 if (rtt == -1) {
1499 ilbd_set_fail_state(srv);
1500 return;
1501 } else if (rtt > 0) {
1502 /* If the returned RTT value is not valid, just ignore it. */
1503 if (rtt > 0 && rtt <= UINT_MAX) {
1504 /* Set rtt to be the simple smoothed average. */
1505 if (srv->shc_rtt == 0) {
1506 srv->shc_rtt = rtt;
1507 } else {
1508 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) +
1509 (rtt >> 2);
1510 }
1511 }
1512
1513 }
1514
1515 switch (srv->shc_state) {
1516 case ilbd_hc_def_pinging:
1517 srv->shc_state = ilbd_hc_probing;
1518
1519 /* Ping is OK, now start the probe. */
1520 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
1521 break;
1522 case ilbd_hc_probing:
1523 srv->shc_fail_cnt = 0;
1524
1525 /* Server is dead before, re-enable it. */
1526 if (srv->shc_status == ILB_HCS_UNREACH ||
1527 srv->shc_status == ILB_HCS_DEAD) {
1528 /*
1529 * If enabling the server in kernel fails now,
1530 * hopefully when the timer fires again later, the
1531 * enabling can be done.
1532 */
1533 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
1534 srv->shc_hc_rule->hcr_rule->irl_name,
1535 stat_declare_srv_alive) != ILB_STATUS_OK) {
1536 logerr("%s: cannot enable server in kernel: "
1537 " rule %s server %s", __func__,
1538 srv->shc_hc_rule->hcr_rule->irl_name,
1539 srv->shc_sg_srv->sgs_srvID);
1540 } else {
1541 srv->shc_status = ILB_HCS_ALIVE;
1542 }
1543 } else {
1544 srv->shc_status = ILB_HCS_ALIVE;
1545 }
1546 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
1547 logerr("%s: cannot restart timer: rule %s server %s",
1548 __func__, srv->shc_hc_rule->hcr_rule->irl_name,
1549 srv->shc_sg_srv->sgs_srvID);
1550 ilbd_mark_server_disabled(srv);
1551 }
1552 break;
1553 default:
1554 logdebug("%s: unknown state", __func__);
1555 break;
1556 }
1557 }
1558
1559 /*
1560 * Handle the return event of a child probe fd.
1561 */
1562 void
1563 ilbd_hc_probe_return(int ev_port, int fd, int port_events,
1564 ilbd_hc_probe_event_t *ev)
1565 {
1566 /*
1567 * Note that there can be more than one events delivered to us at
1568 * the same time. So we need to check them individually.
1569 */
1570 if (port_events & POLLRDNORM)
1571 ilbd_hc_child_data(fd, ev);
1572
1573 if (port_events & (POLLHUP|POLLERR)) {
1574 ilbd_hc_child_hup(ev_port, fd, ev);
1575 return;
1576 }
1577
1578 /*
1579 * Re-associate the fd with the port so that when the child
1580 * exits, we can reap the status.
1581 */
1582 reassociate_port(ev_port, fd, ev);
1583 }