1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2015 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Shootdown processing logic.
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <sys/uuid.h>
  25 #include <assert.h>
  26 #include <strings.h>
  27 #include <errno.h>
  28 #include <sys/debug.h>
  29 
  30 #include <libvarpd_provider.h>
  31 #include <libvarpd_svp.h>
  32 
  33 /*
  34  * When we've determined that there's nothing left for us to do, then we go
  35  * ahead and wait svp_shootdown_base seconds + up to an additional
  36  * svp_shootdown_base seconds before asking again. However, if there is actually
  37  * some work going on, just use the svp_shootdown_cont time.
  38  */
  39 static int svp_shootdown_base = 5;
  40 static int svp_shootdown_cont = 1;
  41 
  42 /*
  43  * These are sizes for our logack and logrm buffers. The sizing of the shootdown
  44  * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries
  45  * or some combination thereof. While it's a bit of overkill, we just use the
  46  * same sized buffer for the list of uuids that we pass to remove log entries
  47  * that we've acted upon.
  48  */
  49 static int svp_shootdown_buf = 1024;
  50 
  51 static void
  52 svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont)
  53 {
  54         assert(MUTEX_HELD(&sdl->sdl_lock));
  55 
  56         if (cont == B_TRUE) {
  57                 sdl->sdl_timer.st_value = svp_shootdown_cont;
  58         } else {
  59                 sdl->sdl_timer.st_value = svp_shootdown_base +
  60                     arc4random_uniform(svp_shootdown_base + 1);
  61         }
  62         svp_timer_add(&sdl->sdl_timer);
  63 }
  64 
  65 void
  66 svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status)
  67 {
  68         svp_sdlog_t *sdl = &srp->sr_shoot;
  69 
  70         mutex_enter(&sdl->sdl_lock);
  71         sdl->sdl_flags &= ~SVP_SD_RUNNING;
  72         svp_shootdown_schedule(sdl, B_TRUE);
  73         mutex_exit(&sdl->sdl_lock);
  74 
  75         if (status != SVP_S_OK) {
  76                 (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed",
  77                     BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
  78                     BUNYAN_T_INT32, "remote_port", srp->sr_rport,
  79                     BUNYAN_T_INT32, "status", status,
  80                     BUNYAN_T_END);
  81         }
  82 }
  83 
  84 static void
  85 svp_shootdown_ref(svp_sdlog_t *sdl)
  86 {
  87         mutex_enter(&sdl->sdl_lock);
  88         sdl->sdl_ref++;
  89         mutex_exit(&sdl->sdl_lock);
  90 }
  91 
  92 static void
  93 svp_shootdown_rele(svp_sdlog_t *sdl)
  94 {
  95         svp_lrm_req_t *svrr = sdl->sdl_logrm;
  96         boolean_t next;
  97 
  98         mutex_enter(&sdl->sdl_lock);
  99         VERIFY(sdl->sdl_ref > 0);
 100         sdl->sdl_ref--;
 101         if (sdl->sdl_ref > 0) {
 102                 mutex_exit(&sdl->sdl_lock);
 103                 return;
 104         }
 105 
 106         /*
 107          * At this point we know that we hold the last reference, therefore it's
 108          * safe for us to go ahead and clean up and move on and attempt to
 109          * deliver the reply. We always deliver the reply by going through the
 110          * timer. This can be rather important as the final reference may be
 111          * coming through a failed query and it's not always safe for us to
 112          * callback into the remote routines from this context.
 113          *
 114          * We should only do this if we have a non-zero number of entries to
 115          * take down.
 116          */
 117         sdl->sdl_flags &= ~SVP_SD_RUNNING;
 118         if (svrr->svrr_count > 0) {
 119                 sdl->sdl_flags |= SVP_SD_DORM;
 120                 next = B_TRUE;
 121         } else {
 122                 next = B_FALSE;
 123         }
 124         svp_shootdown_schedule(sdl, next);
 125         mutex_exit(&sdl->sdl_lock);
 126 }
 127 
 128 /*
 129  * This is a callback used to indicate that the VL3 lookup has completed and an
 130  * entry, if any, has been injected. If the command succeeded, eg. we got that
 131  * the status was OK or that it was not found, then we will add it to he list to
 132  * shoot down. Otherwise, there's nothing else for us to really do here.
 133  */
 134 void
 135 svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl)
 136 {
 137         svp_lrm_req_t *svrr = sdl->sdl_logrm;
 138 
 139         mutex_enter(&sdl->sdl_lock);
 140         if (status == SVP_S_OK || status == SVP_S_NOTFOUND) {
 141                 bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16],
 142                     UUID_LEN);
 143                 svrr->svrr_count++;
 144         }
 145         mutex_exit(&sdl->sdl_lock);
 146 
 147         svp_shootdown_rele(sdl);
 148 }
 149 
 150 static int
 151 svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg)
 152 {
 153         svp_sdlog_t *sdl = arg;
 154         svp_remote_t *srp = sdl->sdl_remote;
 155         svp_lrm_req_t *svrr = sdl->sdl_logrm;
 156 
 157         if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3)
 158                 libvarpd_panic("encountered unknown type: %d\n", type);
 159 
 160         if (type == SVP_LOG_VL2) {
 161                 svp_log_vl2_t *svl2 = data;
 162                 svp_remote_shootdown_vl2(srp, svl2);
 163                 mutex_enter(&sdl->sdl_lock);
 164                 bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16],
 165                     UUID_LEN);
 166                 svrr->svrr_count++;
 167                 mutex_exit(&sdl->sdl_lock);
 168         } else {
 169                 svp_log_vl3_t *svl3 = data;
 170 
 171                 /* Take a hold for the duration of this request */
 172                 svp_shootdown_ref(sdl);
 173                 svp_remote_shootdown_vl3(srp, svl3, sdl);
 174         }
 175 
 176         return (0);
 177 }
 178 
 179 static int
 180 svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg)
 181 {
 182         uint_t *u = arg;
 183         *u = *u + 1;
 184         return (0);
 185 }
 186 
 187 
 188 static int
 189 svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len,
 190     int (*cb)(void *, svp_log_type_t, void *), void *arg)
 191 {
 192         int ret;
 193         off_t cboff = 0;
 194         uint32_t *typep, type;
 195         svp_log_vl2_t *svl2;
 196         svp_log_vl3_t *svl3;
 197 
 198         /* Adjust for initial status word */
 199         assert(len >= sizeof (uint32_t));
 200         len -= sizeof (uint32_t);
 201         cboff += sizeof (uint32_t);
 202 
 203         while (len > 0) {
 204                 size_t opsz;
 205 
 206                 if (len < sizeof (uint32_t)) {
 207                         (void) bunyan_warn(svp_bunyan,
 208                             "failed to get initial shootdown tag",
 209                             BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
 210                             BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 211                             BUNYAN_T_INT32, "response_size", cboff + len,
 212                             BUNYAN_T_INT32, "response_offset", cboff,
 213                             BUNYAN_T_END);
 214                         return (-1);
 215                 }
 216 
 217                 typep = buf + cboff;
 218                 type = ntohl(*typep);
 219                 if (type == SVP_LOG_VL2) {
 220                         opsz = sizeof (svp_log_vl2_t);
 221                         if (len < opsz) {
 222                                 (void) bunyan_warn(svp_bunyan,
 223                                     "not enough data for svp_log_vl2_t",
 224                                     BUNYAN_T_STRING, "remote_host",
 225                                     srp->sr_hostname,
 226                                     BUNYAN_T_INT32, "remote_port",
 227                                     srp->sr_rport,
 228                                     BUNYAN_T_INT32, "response_size",
 229                                     cboff + len,
 230                                     BUNYAN_T_INT32, "response_offset", cboff,
 231                                     BUNYAN_T_END);
 232                                 return (-1);
 233                         }
 234                         svl2 = (void *)typep;
 235                         if ((ret = cb(svl2, type, arg)) != 0)
 236                                 return (ret);
 237                 } else if (type == SVP_LOG_VL3) {
 238 
 239                         opsz = sizeof (svp_log_vl3_t);
 240                         if (len < opsz) {
 241                                 (void) bunyan_warn(svp_bunyan,
 242                                     "not enough data for svp_log_vl3_t",
 243                                     BUNYAN_T_STRING, "remote_host",
 244                                     srp->sr_hostname,
 245                                     BUNYAN_T_INT32, "remote_port",
 246                                     srp->sr_rport,
 247                                     BUNYAN_T_INT32, "response_size",
 248                                     cboff + len,
 249                                     BUNYAN_T_INT32, "response_offset", cboff,
 250                                     BUNYAN_T_END);
 251                                 return (-1);
 252                         }
 253                         svl3 = (void *)typep;
 254                         if ((ret = cb(svl3, type, arg)) != 0)
 255                                 return (ret);
 256                 } else {
 257                         (void) bunyan_warn(svp_bunyan,
 258                             "unknown log structure type",
 259                             BUNYAN_T_STRING, "remote_host",
 260                             srp->sr_hostname,
 261                             BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 262                             BUNYAN_T_INT32, "response_size", cboff + len,
 263                             BUNYAN_T_INT32, "response_offset", cboff,
 264                             BUNYAN_T_INT32, "structure_type", type,
 265                             BUNYAN_T_END);
 266                         return (-1);
 267                 }
 268                 len -= opsz;
 269                 cboff += opsz;
 270         }
 271 
 272         return (0);
 273 }
 274 
 275 void
 276 svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata,
 277     size_t cbsize)
 278 {
 279         uint_t count;
 280         svp_sdlog_t *sdl = &srp->sr_shoot;
 281 
 282         if (status != SVP_S_OK) {
 283                 (void) bunyan_warn(svp_bunyan,
 284                     "log request not OK",
 285                     BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
 286                     BUNYAN_T_INT32, "remote_port", srp->sr_rport,
 287                     BUNYAN_T_INT32, "response_size", cbsize,
 288                     BUNYAN_T_INT32, "status", status,
 289                     BUNYAN_T_END);
 290                 mutex_enter(&sdl->sdl_lock);
 291                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 292                 svp_shootdown_schedule(sdl, B_FALSE);
 293                 mutex_exit(&sdl->sdl_lock);
 294                 return;
 295         }
 296 
 297         /*
 298          * First go ahead and count the number of entries. This effectively
 299          * allows us to validate that all the data is valid, if this fails, then
 300          * we fail the request.
 301          */
 302         count = 0;
 303         if ((svp_shootdown_logr_iter(srp, cbdata, cbsize,
 304             svp_shootdown_logr_count, &count)) != 0) {
 305                 mutex_enter(&sdl->sdl_lock);
 306                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 307                 svp_shootdown_schedule(sdl, B_FALSE);
 308                 mutex_exit(&sdl->sdl_lock);
 309                 return;
 310         }
 311 
 312         /*
 313          * If we have no entries, then we're also done.
 314          */
 315         if (count == 0) {
 316                 mutex_enter(&sdl->sdl_lock);
 317                 sdl->sdl_flags &= ~SVP_SD_RUNNING;
 318                 svp_shootdown_schedule(sdl, B_FALSE);
 319                 mutex_exit(&sdl->sdl_lock);
 320                 return;
 321         }
 322 
 323         /*
 324          * We have work to do. Because we may have asynchronous VL3 tasks, we're
 325          * going to first grab a reference before we do the iteration. Then, for
 326          * each asynchronous VL3 request we make, that'll also grab a hold. Once
 327          * we're done with the iteration, we'll drop our hold. If that's the
 328          * last one, it'll move on accordingly.
 329          */
 330         svp_shootdown_ref(sdl);
 331         bzero(sdl->sdl_logrm, svp_shootdown_buf);
 332 
 333         /*
 334          * If this fails, we're going to determine what to do next based on the
 335          * number of entries that were entered into the log removal. At this
 336          * point success or failure don't really look different, all it changes
 337          * is how many entries we have to remove.
 338          */
 339         (void) svp_shootdown_logr_iter(srp, cbdata, cbsize,
 340             svp_shootdown_logr_shoot, sdl);
 341 
 342         /*
 343          * Now that we're done with our work, release the hold. If we don't have
 344          * any vl3 tasks outstanding, this'll trigger the next phase of the log
 345          * removals.
 346          */
 347         svp_shootdown_rele(sdl);
 348 }
 349 
 350 static void
 351 svp_shootdown_timer(void *arg)
 352 {
 353         svp_sdlog_t *sdl = arg;
 354         svp_remote_t *srp = sdl->sdl_remote;
 355         boolean_t init = B_TRUE;
 356 
 357         mutex_enter(&sdl->sdl_lock);
 358 
 359         /*
 360          * If we've been asked to quiesce, we're done.
 361          */
 362         if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) {
 363                 mutex_exit(&sdl->sdl_lock);
 364                 return;
 365         }
 366 
 367         /*
 368          * We shouldn't be able to have ourselves currently be running and reach
 369          * here. If that's the case, we should immediately panic.
 370          */
 371         if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) {
 372                 libvarpd_panic("remote %p shootdown timer fired while still "
 373                     "running", srp);
 374         }
 375 
 376         if ((sdl->sdl_flags & SVP_SD_DORM) != 0) {
 377                 sdl->sdl_flags &= ~SVP_SD_DORM;
 378                 init = B_FALSE;
 379         }
 380 
 381         sdl->sdl_flags |= SVP_SD_RUNNING;
 382         mutex_exit(&sdl->sdl_lock);
 383 
 384         if (init == B_FALSE) {
 385                 svp_lrm_req_t *svrr = sdl->sdl_logrm;
 386 
 387                 bzero(&sdl->sdl_query, sizeof (svp_query_t));
 388                 svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr,
 389                     sizeof (*svrr) + 16 * svrr->svrr_count);
 390         } else {
 391                 bzero(&sdl->sdl_query, sizeof (svp_query_t));
 392                 svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack,
 393                     svp_shootdown_buf);
 394         }
 395 }
 396 
 397 void
 398 svp_shootdown_fini(svp_remote_t *srp)
 399 {
 400         svp_sdlog_t *sdl = &srp->sr_shoot;
 401 
 402         mutex_enter(&sdl->sdl_lock);
 403         sdl->sdl_flags |= SVP_SD_QUIESCE;
 404         mutex_exit(&sdl->sdl_lock);
 405 
 406         svp_timer_remove(&sdl->sdl_timer);
 407 
 408         mutex_enter(&sdl->sdl_lock);
 409 
 410         /*
 411          * Normally svp_timer_remove would be enough. However, the query could
 412          * have been put out again outside of the svp_timer interface. Therefore
 413          * we still need to check for SVP_SD_RUNNING.
 414          */
 415         while (sdl->sdl_flags & SVP_SD_RUNNING)
 416                 (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock);
 417         mutex_exit(&sdl->sdl_lock);
 418 
 419         umem_free(sdl->sdl_logack, svp_shootdown_buf);
 420         umem_free(sdl->sdl_logrm, svp_shootdown_buf);
 421         sdl->sdl_logack = NULL;
 422         sdl->sdl_logrm = NULL;
 423         (void) cond_destroy(&sdl->sdl_cond);
 424         (void) mutex_destroy(&sdl->sdl_lock);
 425 }
 426 
 427 void
 428 svp_shootdown_start(svp_remote_t *srp)
 429 {
 430         svp_sdlog_t *sdl = &srp->sr_shoot;
 431 
 432         mutex_enter(&sdl->sdl_lock);
 433         svp_shootdown_schedule(sdl, B_FALSE);
 434         mutex_exit(&sdl->sdl_lock);
 435 }
 436 
 437 int
 438 svp_shootdown_init(svp_remote_t *srp)
 439 {
 440         int ret;
 441         svp_sdlog_t *sdl = &srp->sr_shoot;
 442         if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 443             NULL)) != 0)
 444                 return (ret);
 445 
 446         if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) {
 447                 (void) mutex_destroy(&sdl->sdl_lock);
 448                 return (ret);
 449         }
 450 
 451         if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
 452             NULL) {
 453                 ret = errno;
 454                 (void) cond_destroy(&sdl->sdl_cond);
 455                 (void) mutex_destroy(&sdl->sdl_lock);
 456                 return (ret);
 457         }
 458 
 459         if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
 460             NULL) {
 461                 ret = errno;
 462                 umem_free(sdl->sdl_logack, svp_shootdown_buf);
 463                 (void) cond_destroy(&sdl->sdl_cond);
 464                 (void) mutex_destroy(&sdl->sdl_lock);
 465                 return (ret);
 466         }
 467 
 468         sdl->sdl_remote = srp;
 469         sdl->sdl_timer.st_oneshot = B_TRUE;
 470         sdl->sdl_timer.st_func = svp_shootdown_timer;
 471         sdl->sdl_timer.st_arg = sdl;
 472 
 473         return (0);
 474 }