1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2015 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Shootdown processing logic.
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <sys/uuid.h>
  25 #include <assert.h>
  26 #include <strings.h>
  27 #include <errno.h>
  28 #include <sys/debug.h>
  29 
  30 #include <libvarpd_provider.h>
  31 #include <libvarpd_svp.h>
  32 
  33 /*
  34  * When we've determined that there's nothing left for us to do, then we go
  35  * ahead and wait svp_shootdown_base seconds + up to an additional
  36  * svp_shootdown_base seconds before asking again. However, if there is actually
  37  * some work going on, just use the svp_shootdown_cont time.
  38  */
  39 static int svp_shootdown_base = 5;
  40 static int svp_shootdown_cont = 1;
  41 
  42 /*
  43  * These are sizes for our logack and logrm buffers. The sizing of the shootdown
  44  * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries
  45  * or some combination thereof. While it's a bit of overkill, we just use the
  46  * same sized buffer for the list of uuids that we pass to remove log entries
  47  * that we've acted upon.
  48  */
  49 static int svp_shootdown_buf = 1024;
  50 
  51 static void
  52 svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont)
  53 {
  54         assert(MUTEX_HELD(&sdl->sdl_lock));
  55 
  56         if (cont == B_TRUE) {
  57                 sdl->sdl_timer.st_value = svp_shootdown_cont;
  58         } else {
  59                 sdl->sdl_timer.st_value = svp_shootdown_base +
  60                     arc4random_uniform(svp_shootdown_base + 1);
  61         }
  62         svp_timer_add(&sdl->sdl_timer);
  63 }
  64 
  65 void
  66 svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status)
  67 {
  68         svp_sdlog_t *sdl = &srp->sr_shoot;
  69 
  70         mutex_enter(&sdl->sdl_lock);
  71         sdl->sdl_flags &= ~SVP_SD_RUNNING;
  72         svp_shootdown_schedule(sdl, B_TRUE);
  73         mutex_exit(&sdl->sdl_lock);
  74 
  75         if (status != SVP_S_OK) {
  76                 (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed",
  77                     BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
  78                     BUNYAN_T_INT32, "remote_port", srp->sr_rport,
  79                     BUNYAN_T_INT32, "status", status,
  80                     BUNYAN_T_END);
  81         }
  82 }
  83 
  84 static void
  85 svp_shootdown_ref(svp_sdlog_t *sdl)
  86 {
  87         mutex_enter(&sdl->sdl_lock);
  88         sdl->sdl_ref++;
  89         mutex_exit(&sdl->sdl_lock);
  90 }
  91 
  92 static void
  93 svp_shootdown_rele(svp_sdlog_t *sdl)
  94 {
  95         svp_lrm_req_t *svrr = sdl->sdl_logrm;
  96         boolean_t next;
  97 
  98         mutex_enter(&sdl->sdl_lock);
  99         VERIFY(sdl->sdl_ref > 0);
 100         sdl->sdl_ref--;
 101         if (sdl->sdl_ref > 0) {
 102                 mutex_exit(&sdl->sdl_lock);
 103                 return;
 104         }
 105 
 106         /*
 107          * At this point we know that we hold the last reference, therefore it's
 108          * safe for us to go ahead and clean up and move on and attempt to
 109          * deliver the reply. We always deliver the reply by going through the
 110          * timer. This can be rather important as the final reference may be
 111          * coming through a failed query and it's not always safe for us to
 112          * callback into the remote routines from this context.
 113          *
 114          * We should only do this if we have a non-zero number of entries to
 115          * take down.
 116          */
 117         sdl->sdl_flags &= ~SVP_SD_RUNNING;
 118         if (svrr->svrr_count > 0) {
 119                 sdl->sdl_flags |= SVP_SD_DORM;
 120                 next = B_TRUE;
 121         } else {
 122                 next = B_FALSE;
 123         }
 124         svp_shootdown_schedule(sdl, next);
 125         mutex_exit(&sdl->sdl_lock);
 126 }
 127 
 128 /*
 129  * This is a callback used to indicate that the VL3 lookup has completed and an
 130  * entry, if any, has been injected. If the command succeeded, eg. we got that
 131  * the status was OK or that it was not found, then we will add it to he list to
 132  * shoot down. Otherwise, there's nothing else for us to really do here.
 133  */
 134 void
 135 svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl)
 136 {
 137         svp_lrm_req_t *svrr = sdl->sdl_logrm;
 138 
 139         mutex_enter(&sdl->sdl_lock);
 140         if (status == SVP_S_OK || status == SVP_S_NOTFOUND) {
 141                 bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16],
 142                     UUID_LEN);
 143                 svrr->svrr_count++;
 144         }
 145         mutex_exit(&sdl->sdl_lock);
 146 
 147         svp_shootdown_rele(sdl);
 148 }
 149 
 150 static int
 151 svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg)
 152 {
 153         svp_sdlog_t *sdl = arg;
 154         svp_remote_t *srp = sdl->sdl_remote;
 155         svp_lrm_req_t *svrr = sdl->sdl_logrm;
 156 
 157         if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3 && type != SVP_LOG_ROUTE)
 158                 libvarpd_panic("encountered unknown type: %d\n", type);
 159 
 160         if (type == SVP_LOG_VL2) {
 161                 svp_log_vl2_t *svl2 = data;
 162                 svp_remote_shootdown_vl2(srp, svl2);
 163                 mutex_enter(&sdl->sdl_lock);
 164                 bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16],
 165                     UUID_LEN);
 166                 svrr->svrr_count++;
 167                 mutex_exit(&sdl->sdl_lock);
 168         } else if (type == SVP_LOG_VL3) {
 169                 svp_log_vl3_t *svl3 = data;
 170 
 171                 /* Take a hold for the duration of this request */
 172                 svp_shootdown_ref(sdl);
 173                 svp_remote_shootdown_vl3(srp, svl3, sdl);
 174         } else {
 175                 svp_log_route_t *svlr = data;
 176 
 177                 svp_remote_shootdown_route(srp, svlr);
 178                 mutex_enter(&sdl->sdl_lock);
 179                 bcopy(svlr->svlr_id, &svrr->svrr_ids[svrr->svrr_count * 16],
 180                     UUID_LEN);
 181                 svrr->svrr_count++;
 182                 mutex_exit(&sdl->sdl_lock);
 183         }
 184 
 185         return (0);
 186 }
 187 
 188 static int
 189 svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg)
 190 {
 191         uint_t *u = arg;
 192         *u = *u + 1;
 193         return (0);
 194 }
 195 
 196 
 197 static int
 198 svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len,
 199     int (*cb)(void *, svp_log_type_t, void *), void *arg, uint16_t version)
 200 {
 201         int ret;
 202         off_t cboff = 0;
 203         uint32_t *typep, type;
 204         svp_log_vl2_t *svl2;
 205         svp_log_vl3_t *svl3;
 206 
 207         /* Adjust for initial status word */
 208         assert(len >= sizeof (uint32_t));
 209         len -= sizeof (uint32_t);
 210         cboff += sizeof (uint32_t);
 211 
 212         while (len > 0) {
 213                 size_t opsz;
 214                 char *typestring;
 215 
 216                 if (len < sizeof (uint32_t)) {
 217                         (void) bunyan_warn(svp_bunyan,
 218                             "failed to get initial shootdown tag",
 219                             BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
 220                             BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 221                             BUNYAN_T_INT32, "response_size", cboff + len,
 222                             BUNYAN_T_INT32, "response_offset", cboff,
 223                             BUNYAN_T_END);
 224                         return (-1);
 225                 }
 226 
 227                 typep = buf + cboff;
 228                 type = ntohl(*typep);
 229                 switch (type) {
 230                 case SVP_LOG_VL2:
 231                         opsz = sizeof (svp_log_vl2_t);
 232                         typestring = "svp_log_vl2_t";
 233                         break;
 234                 case SVP_LOG_VL3:
 235                         opsz = sizeof (svp_log_vl3_t);
 236                         typestring = "svp_log_vl3_t";
 237                         break;
 238                 case SVP_LOG_ROUTE:
 239                         if (version < SVP_VERSION_TWO) {
 240                                 (void) bunyan_warn(svp_bunyan,
 241                                     "insufficient version for SVP_LOG_ROUTE",
 242                                     BUNYAN_T_UINT32, "version", version,
 243                                     BUNYAN_T_STRING, "remote_host",
 244                                     srp->sr_hostname,
 245                                     BUNYAN_T_INT32, "remote_port",
 246                                     srp->sr_rport,
 247                                     BUNYAN_T_INT32, "response_size",
 248                                     cboff + len,
 249                                     BUNYAN_T_INT32, "response_offset", cboff,
 250                                     BUNYAN_T_END);
 251                                 return (-1);
 252                         }
 253                         opsz = sizeof (svp_log_route_t);
 254                         typestring = "svp_log_route_t";
 255                         break;
 256                 default:
 257                         (void) bunyan_warn(svp_bunyan,
 258                             "unknown log structure type",
 259                             BUNYAN_T_STRING, "remote_host",
 260                             srp->sr_hostname,
 261                             BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 262                             BUNYAN_T_INT32, "response_size", cboff + len,
 263                             BUNYAN_T_INT32, "response_offset", cboff,
 264                             BUNYAN_T_INT32, "structure_type", type,
 265                             BUNYAN_T_END);
 266                         return (-1);
 267                 }
 268                 if (len < opsz) {
 269                         (void) bunyan_warn(svp_bunyan,
 270                             "not enough data for",
 271                             BUNYAN_T_STRING, "", typestring,
 272                             BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
 273                             BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 274                             BUNYAN_T_INT32, "response_size", cboff + len,
 275                             BUNYAN_T_INT32, "response_offset", cboff,
 276                             BUNYAN_T_END);
 277                         return (-1);
 278                 }
 279                 if ((ret = cb((void *)typep, type, arg)) != 0)
 280                         return (ret);
 281 
 282                 len -= opsz;
 283                 cboff += opsz;
 284         }
 285 
 286         return (0);
 287 }
 288 
 289 void
 290 svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata,
 291     size_t cbsize, uint16_t version)
 292 {
 293         uint_t count;
 294         svp_sdlog_t *sdl = &srp->sr_shoot;
 295 
 296         if (status != SVP_S_OK) {
 297                 (void) bunyan_warn(svp_bunyan,
 298                     "log request not OK",
 299                     BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
 300                     BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 301                     BUNYAN_T_INT32, "response_size", cbsize,
 302                     BUNYAN_T_INT32, "status", status,
 303                     BUNYAN_T_END);
 304                 mutex_enter(&sdl->sdl_lock);
 305                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 306                 svp_shootdown_schedule(sdl, B_FALSE);
 307                 mutex_exit(&sdl->sdl_lock);
 308                 return;
 309         }
 310 
 311         /*
 312          * First go ahead and count the number of entries. This effectively
 313          * allows us to validate that all the data is valid, if this fails, then
 314          * we fail the request.
 315          */
 316         count = 0;
 317         if ((svp_shootdown_logr_iter(srp, cbdata, cbsize,
 318                 svp_shootdown_logr_count, &count, version)) != 0) {
 319                 mutex_enter(&sdl->sdl_lock);
 320                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 321                 svp_shootdown_schedule(sdl, B_FALSE);
 322                 mutex_exit(&sdl->sdl_lock);
 323                 return;
 324         }
 325 
 326         /*
 327          * If we have no entries, then we're also done.
 328          */
 329         if (count == 0) {
 330                 mutex_enter(&sdl->sdl_lock);
 331                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 332                 svp_shootdown_schedule(sdl, B_FALSE);
 333                 mutex_exit(&sdl->sdl_lock);
 334                 return;
 335         }
 336 
 337         /*
 338          * We have work to do. Because we may have asynchronous VL3 tasks, we're
 339          * going to first grab a reference before we do the iteration. Then, for
 340          * each asynchronous VL3 request we make, that'll also grab a hold. Once
 341          * we're done with the iteration, we'll drop our hold. If that's the
 342          * last one, it'll move on accordingly.
 343          */
 344         svp_shootdown_ref(sdl);
 345         bzero(sdl->sdl_logrm, svp_shootdown_buf);
 346 
 347         /*
 348          * If this fails, we're going to determine what to do next based on the
 349          * number of entries that were entered into the log removal. At this
 350          * point success or failure don't really look different, all it changes
 351          * is how many entries we have to remove.
 352          */
 353         (void) svp_shootdown_logr_iter(srp, cbdata, cbsize,
 354             svp_shootdown_logr_shoot, sdl, version);
 355 
 356         /*
 357          * Now that we're done with our work, release the hold. If we don't have
 358          * any vl3 tasks outstanding, this'll trigger the next phase of the log
 359          * removals.
 360          */
 361         svp_shootdown_rele(sdl);
 362 }
 363 
 364 static void
 365 svp_shootdown_timer(void *arg)
 366 {
 367         svp_sdlog_t *sdl = arg;
 368         svp_remote_t *srp = sdl->sdl_remote;
 369         boolean_t init = B_TRUE;
 370 
 371         mutex_enter(&sdl->sdl_lock);
 372 
 373         /*
 374          * If we've been asked to quiesce, we're done.
 375          */
 376         if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) {
 377                 mutex_exit(&sdl->sdl_lock);
 378                 return;
 379         }
 380 
 381         /*
 382          * We shouldn't be able to have ourselves currently be running and reach
 383          * here. If that's the case, we should immediately panic.
 384          */
 385         if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) {
 386                 libvarpd_panic("remote %p shootdown timer fired while still "
 387                     "running", srp);
 388         }
 389 
 390         if ((sdl->sdl_flags & SVP_SD_DORM) != 0) {
 391                 sdl->sdl_flags &= ~SVP_SD_DORM;
 392                 init = B_FALSE;
 393         }
 394 
 395         sdl->sdl_flags |= SVP_SD_RUNNING;
 396         mutex_exit(&sdl->sdl_lock);
 397 
 398         if (init == B_FALSE) {
 399                 svp_lrm_req_t *svrr = sdl->sdl_logrm;
 400 
 401                 bzero(&sdl->sdl_query, sizeof (svp_query_t));
 402                 svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr,
 403                     sizeof (*svrr) + 16 * svrr->svrr_count);
 404         } else {
 405                 bzero(&sdl->sdl_query, sizeof (svp_query_t));
 406                 svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack,
 407                     svp_shootdown_buf);
 408         }
 409 }
 410 
 411 void
 412 svp_shootdown_fini(svp_remote_t *srp)
 413 {
 414         svp_sdlog_t *sdl = &srp->sr_shoot;
 415 
 416         mutex_enter(&sdl->sdl_lock);
 417         sdl->sdl_flags |= SVP_SD_QUIESCE;
 418         mutex_exit(&sdl->sdl_lock);
 419 
 420         svp_timer_remove(&sdl->sdl_timer);
 421 
 422         mutex_enter(&sdl->sdl_lock);
 423 
 424         /*
 425          * Normally svp_timer_remove would be enough. However, the query could
 426          * have been put out again outside of the svp_timer interface. Therefore
 427          * we still need to check for SVP_SD_RUNNING.
 428          */
 429         while (sdl->sdl_flags & SVP_SD_RUNNING)
 430                 (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock);
 431         mutex_exit(&sdl->sdl_lock);
 432 
 433         umem_free(sdl->sdl_logack, svp_shootdown_buf);
 434         umem_free(sdl->sdl_logrm, svp_shootdown_buf);
 435         sdl->sdl_logack = NULL;
 436         sdl->sdl_logrm = NULL;
 437         (void) cond_destroy(&sdl->sdl_cond);
 438         (void) mutex_destroy(&sdl->sdl_lock);
 439 }
 440 
 441 void
 442 svp_shootdown_start(svp_remote_t *srp)
 443 {
 444         svp_sdlog_t *sdl = &srp->sr_shoot;
 445 
 446         mutex_enter(&sdl->sdl_lock);
 447         svp_shootdown_schedule(sdl, B_FALSE);
 448         mutex_exit(&sdl->sdl_lock);
 449 }
 450 
 451 int
 452 svp_shootdown_init(svp_remote_t *srp)
 453 {
 454         int ret;
 455         svp_sdlog_t *sdl = &srp->sr_shoot;
 456         if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 457             NULL)) != 0)
 458                 return (ret);
 459 
 460         if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) {
 461                 (void) mutex_destroy(&sdl->sdl_lock);
 462                 return (ret);
 463         }
 464 
 465         if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
 466             NULL) {
 467                 ret = errno;
 468                 (void) cond_destroy(&sdl->sdl_cond);
 469                 (void) mutex_destroy(&sdl->sdl_lock);
 470                 return (ret);
 471         }
 472 
 473         if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
 474             NULL) {
 475                 ret = errno;
 476                 umem_free(sdl->sdl_logack, svp_shootdown_buf);
 477                 (void) cond_destroy(&sdl->sdl_cond);
 478                 (void) mutex_destroy(&sdl->sdl_lock);
 479                 return (ret);
 480         }
 481 
 482         sdl->sdl_remote = srp;
 483         sdl->sdl_timer.st_oneshot = B_TRUE;
 484         sdl->sdl_timer.st_func = svp_shootdown_timer;
 485         sdl->sdl_timer.st_arg = sdl;
 486 
 487         return (0);
 488 }