Print this page
NEX-20098 idm_refcnt_unref_task() fails to hold mutex before calling REFCNT_AUDIT
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-9981 Deadman timer panic from idm_refcnt_wait_ref thread while offlining iSCSI targets
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-6018 Return of the walking dead idm_refcnt_wait_ref comstar threads
Reviewed by:  Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by:  Evan Layton <evan.layton@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/idm/idm.c
          +++ new/usr/src/uts/common/io/idm/idm.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
  23   24   */
  24   25  
  25   26  #include <sys/cpuvar.h>
  26   27  #include <sys/conf.h>
  27   28  #include <sys/file.h>
  28   29  #include <sys/ddi.h>
  29   30  #include <sys/sunddi.h>
  30   31  #include <sys/modctl.h>
  31   32  
  32   33  #include <sys/socket.h>
↓ open down ↓ 21 lines elided ↑ open up ↑
  54   55  };
  55   56  
  56   57  extern void idm_wd_thread(void *arg);
  57   58  
  58   59  static int _idm_init(void);
  59   60  static int _idm_fini(void);
  60   61  static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  61   62  static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  62   63  static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  63   64  static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  64      -static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
       65 +static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
  65   66      idm_abort_type_t abort_type);
  66   67  static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
  67   68  static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
  68   69      int sleepflag);
  69   70  
  70   71  boolean_t idm_conn_logging = 0;
  71   72  boolean_t idm_svc_logging = 0;
  72   73  #ifdef DEBUG
  73   74  boolean_t idm_pattern_checking = 1;
  74   75  #else
↓ open down ↓ 1440 lines elided ↑ open up ↑
1515 1516  {
1516 1517          idm_refcnt_hold(&idt->idt_refcnt);
1517 1518  }
1518 1519  
1519 1520  void
1520 1521  idm_task_rele(idm_task_t *idt)
1521 1522  {
1522 1523          idm_refcnt_rele(&idt->idt_refcnt);
1523 1524  }
1524 1525  
1525      -void
     1526 +stmf_status_t
1526 1527  idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1527 1528  {
1528 1529          idm_task_t      *task;
1529 1530          int             idx;
     1531 +        stmf_status_t   s = STMF_SUCCESS;
1530 1532  
1531 1533          /*
1532 1534           * Passing NULL as the task indicates that all tasks
1533 1535           * for this connection should be aborted.
1534 1536           */
1535 1537          if (idt == NULL) {
1536 1538                  /*
1537 1539                   * Only the connection state machine should ask for
1538 1540                   * all tasks to abort and this should never happen in FFP.
1539 1541                   */
↓ open down ↓ 1 lines elided ↑ open up ↑
1541 1543                  rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1542 1544                  for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1543 1545                          task = idm.idm_taskid_table[idx];
1544 1546                          if (task == NULL)
1545 1547                                  continue;
1546 1548                          mutex_enter(&task->idt_mutex);
1547 1549                          if ((task->idt_state != TASK_IDLE) &&
1548 1550                              (task->idt_state != TASK_COMPLETE) &&
1549 1551                              (task->idt_ic == ic)) {
1550 1552                                  rw_exit(&idm.idm_taskid_table_lock);
1551      -                                idm_task_abort_one(ic, task, abort_type);
     1553 +                                s = idm_task_abort_one(ic, task, abort_type);
1552 1554                                  rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1553 1555                          } else
1554 1556                                  mutex_exit(&task->idt_mutex);
1555 1557                  }
1556 1558                  rw_exit(&idm.idm_taskid_table_lock);
1557 1559          } else {
1558 1560                  mutex_enter(&idt->idt_mutex);
1559      -                idm_task_abort_one(ic, idt, abort_type);
     1561 +                s = idm_task_abort_one(ic, idt, abort_type);
1560 1562          }
     1563 +        return (s);
1561 1564  }
1562 1565  
1563 1566  static void
1564 1567  idm_task_abort_unref_cb(void *ref)
1565 1568  {
1566 1569          idm_task_t *idt = ref;
1567 1570  
1568 1571          mutex_enter(&idt->idt_mutex);
1569 1572          switch (idt->idt_state) {
1570 1573          case TASK_SUSPENDING:
↓ open down ↓ 10 lines elided ↑ open up ↑
1581 1584                  mutex_exit(&idt->idt_mutex);
1582 1585                  ASSERT(0);
1583 1586                  break;
1584 1587          }
1585 1588  }
1586 1589  
1587 1590  /*
1588 1591   * Abort the idm task.
1589 1592   *    Caller must hold the task mutex, which will be released before return
1590 1593   */
1591      -static void
     1594 +static stmf_status_t
1592 1595  idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1593 1596  {
     1597 +        stmf_status_t   s = STMF_SUCCESS;
     1598 +
1594 1599          /* Caller must hold connection mutex */
1595 1600          ASSERT(mutex_owned(&idt->idt_mutex));
1596 1601          switch (idt->idt_state) {
1597 1602          case TASK_ACTIVE:
1598 1603                  switch (abort_type) {
1599 1604                  case AT_INTERNAL_SUSPEND:
1600 1605                          /* Call transport to release any resources */
1601 1606                          idt->idt_state = TASK_SUSPENDING;
1602 1607                          mutex_exit(&idt->idt_mutex);
1603 1608                          ic->ic_transport_ops->it_free_task_rsrc(idt);
1604 1609  
1605 1610                          /*
1606 1611                           * Wait for outstanding references.  When all
1607 1612                           * references are released the callback will call
1608 1613                           * idm_task_aborted().
1609 1614                           */
1610 1615                          idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1611 1616                              &idm_task_abort_unref_cb);
1612      -                        return;
     1617 +                        return (s);
1613 1618                  case AT_INTERNAL_ABORT:
1614 1619                  case AT_TASK_MGMT_ABORT:
1615 1620                          idt->idt_state = TASK_ABORTING;
1616 1621                          mutex_exit(&idt->idt_mutex);
1617 1622                          ic->ic_transport_ops->it_free_task_rsrc(idt);
1618 1623  
1619 1624                          /*
1620 1625                           * Wait for outstanding references.  When all
1621 1626                           * references are released the callback will call
1622 1627                           * idm_task_aborted().
1623 1628                           */
1624 1629                          idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1625 1630                              &idm_task_abort_unref_cb);
1626      -                        return;
     1631 +                        return (s);
1627 1632                  default:
1628 1633                          ASSERT(0);
1629 1634                  }
1630 1635                  break;
1631 1636          case TASK_SUSPENDING:
1632 1637                  /* Already called transport_free_task_rsrc(); */
1633 1638                  switch (abort_type) {
1634 1639                  case AT_INTERNAL_SUSPEND:
1635 1640                          /* Already doing it */
1636 1641                          break;
↓ open down ↓ 19 lines elided ↑ open up ↑
1656 1661                          /*
1657 1662                           * We could probably call idm_task_aborted directly
1658 1663                           * here but we may be holding the conn lock. It's
1659 1664                           * easier to just switch contexts.  Even though
1660 1665                           * we shouldn't really have any references we'll
1661 1666                           * set the state to TASK_ABORTING instead of
1662 1667                           * TASK_ABORTED so we can use the same code path.
1663 1668                           */
1664 1669                          idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1665 1670                              &idm_task_abort_unref_cb);
1666      -                        return;
     1671 +                        return (s);
1667 1672                  default:
1668 1673                          ASSERT(0);
1669 1674                  }
1670 1675                  break;
1671 1676          case TASK_ABORTING:
1672 1677          case TASK_ABORTED:
1673 1678                  switch (abort_type) {
1674 1679                  case AT_INTERNAL_SUSPEND:
1675 1680                          /* We're already past this point... */
1676 1681                  case AT_INTERNAL_ABORT:
1677 1682                  case AT_TASK_MGMT_ABORT:
1678 1683                          /* Already doing it */
1679 1684                          break;
1680 1685                  default:
1681 1686                          ASSERT(0);
1682 1687                  }
1683 1688                  break;
1684 1689          case TASK_COMPLETE:
1685      -                /*
1686      -                 * In this case, let it go.  The status has already been
1687      -                 * sent (which may or may not get successfully transmitted)
1688      -                 * and we don't want to end up in a race between completing
1689      -                 * the status PDU and marking the task suspended.
1690      -                 */
     1690 +                idm_refcnt_wait_ref(&idt->idt_refcnt);
     1691 +                s = STMF_ABORT_SUCCESS;
1691 1692                  break;
1692 1693          default:
1693 1694                  ASSERT(0);
1694 1695          }
1695 1696          mutex_exit(&idt->idt_mutex);
     1697 +
     1698 +        return (s);
1696 1699  }
1697 1700  
1698 1701  static void
1699 1702  idm_task_aborted(idm_task_t *idt, idm_status_t status)
1700 1703  {
1701 1704          (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1702 1705  }
1703 1706  
1704 1707  /*
1705 1708   * idm_pdu_tx
↓ open down ↓ 442 lines elided ↑ open up ↑
2148 2151          refcnt->ir_refcnt++;
2149 2152          REFCNT_AUDIT(refcnt);
2150 2153          mutex_exit(&refcnt->ir_mutex);
2151 2154  }
2152 2155  
2153 2156  static void
2154 2157  idm_refcnt_unref_task(void *refcnt_void)
2155 2158  {
2156 2159          idm_refcnt_t *refcnt = refcnt_void;
2157 2160  
     2161 +        mutex_enter(&refcnt->ir_mutex);
2158 2162          REFCNT_AUDIT(refcnt);
     2163 +        mutex_exit(&refcnt->ir_mutex);
2159 2164          (*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2160 2165  }
2161 2166  
2162 2167  void
2163 2168  idm_refcnt_rele(idm_refcnt_t *refcnt)
2164 2169  {
2165 2170          mutex_enter(&refcnt->ir_mutex);
2166 2171          ASSERT(refcnt->ir_refcnt > 0);
2167 2172          refcnt->ir_refcnt--;
2168 2173          REFCNT_AUDIT(refcnt);
↓ open down ↓ 87 lines elided ↑ open up ↑
2256 2261  {
2257 2262          mutex_enter(&refcnt->ir_mutex);
2258 2263          if (refcnt->ir_refcnt == 0) {
2259 2264                  mutex_exit(&refcnt->ir_mutex);
2260 2265                  (*cb_func)(refcnt->ir_referenced_obj);
2261 2266                  return;
2262 2267          }
2263 2268          mutex_exit(&refcnt->ir_mutex);
2264 2269  }
2265 2270  
     2271 +/*
     2272 + * used to determine the status of the refcnt.
     2273 + *
     2274 + * if refcnt is 0 return is 0
     2275 + * if refcnt is negative return is -1
     2276 + * if refcnt > 0 and no waiters return is 1
     2277 + * if refcnt > 0 and waiters return is 2
     2278 + */
     2279 +int
     2280 +idm_refcnt_is_held(idm_refcnt_t *refcnt)
     2281 +{
     2282 +        if (refcnt->ir_refcnt < 0)
     2283 +                return (-1);
     2284 +
     2285 +        if (refcnt->ir_refcnt == 0)
     2286 +                return (0);
     2287 +
     2288 +        if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0)
     2289 +                return (1);
     2290 +
     2291 +        return (2);
     2292 +}
     2293 +
2266 2294  void
2267 2295  idm_conn_hold(idm_conn_t *ic)
2268 2296  {
2269 2297          idm_refcnt_hold(&ic->ic_refcnt);
2270 2298  }
2271 2299  
2272 2300  void
2273 2301  idm_conn_rele(idm_conn_t *ic)
2274 2302  {
2275 2303          idm_refcnt_rele(&ic->ic_refcnt);
↓ open down ↓ 147 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX