Print this page
OS-7088 cyclics corked on overlay socket with full queue
        
@@ -205,10 +205,71 @@
 
         return (B_TRUE);
 }
 
 /*
+ * Kernel socket callback to indicate the socket itself is able to send
+ * data again.  Check for devices on this mux that were send-blocked,
+ * and clear them.
+ */
+/* ARGSUSED */
+static void
+overlay_mux_cansend_now(ksocket_t ksock, ksocket_callback_event_t event,
+    void *arg, uintptr_t ignore_me)
+{
+        overlay_mux_t *mux = (overlay_mux_t *)arg;
+        overlay_dev_t *odd;
+        mac_handle_t *mhs_to_update, *current_mh;
+        size_t allocsize;
+
+        ASSERT3P(ksock, ==, mux->omux_ksock);
+        ASSERT3U(event, ==, KSOCKET_EV_CANSEND);
+
+        /* Traverse omux_devices and check for ones marked as send-blocked. */
+        mutex_enter(&mux->omux_lock);
+        if (mux->omux_count == 0) {
+                /* Nothing to wake up. */
+                mutex_exit(&mux->omux_lock);
+                return;
+        }
+        allocsize = sizeof (mac_handle_t) * mux->omux_count;
+        mhs_to_update = kmem_zalloc(allocsize, KM_NOSLEEP);
+        VERIFY(mhs_to_update != NULL);  /* Failure should be rare. */
+        current_mh = mhs_to_update;
+
+        for (odd = avl_first(&mux->omux_devices); odd != NULL;
+            odd = AVL_NEXT(&mux->omux_devices, odd)) {
+                mac_handle_t odd_mh = NULL;
+
+                mutex_enter(&odd->odd_lock);
+                if ((odd->odd_flags & OVERLAY_F_TXSTOPPED) != 0) {
+                        /* Get ready to tell MAC it can transmit again. */
+                        odd->odd_flags &= ~OVERLAY_F_TXSTOPPED;
+                        odd_mh = odd->odd_mh;
+                }
+                mutex_exit(&odd->odd_lock);
+                if (odd_mh != NULL) {
+                        *current_mh = odd_mh;
+                        current_mh++;
+                }
+        }
+        mutex_exit(&mux->omux_lock);
+
+        /*
+         * Yes, I'm using the value-then-decrement.  "current_mh" is
+         * guaranteed to be at least one ahead of mhs_to_update if there are
+         * any mac handles that need updating.  I also have to do this outside
+         * the omux lock because the tx_update may trigger immediate or
+         * concurrent packet transmission.
+         */
+        while (current_mh-- != mhs_to_update)
+                mac_tx_update(*current_mh);
+
+        kmem_free(mhs_to_update, allocsize);
+}
+
+/*
  * Register a given device with a socket backend. If no such device socket
  * exists, create a new one.
  */
 overlay_mux_t *
 overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol,
@@ -215,10 +276,11 @@
     struct sockaddr *addr, socklen_t len, int *errp)
 {
         int err;
         overlay_mux_t *mux;
         ksocket_t ksock;
+        ksocket_callbacks_t ks_cb = { 0 };
 
         if (errp == NULL)
                 errp = &err;
 
         mutex_enter(&overlay_mux_lock);
@@ -288,11 +350,40 @@
         mux->omux_count = 1;
         avl_create(&mux->omux_devices, overlay_mux_comparator,
             sizeof (overlay_dev_t), offsetof(overlay_dev_t, odd_muxnode));
         mutex_init(&mux->omux_lock, NULL, MUTEX_DRIVER, NULL);
 
+#if defined(OVERLAY_PINCH) || defined(OVERLAY_FC_TEST)
+        /* Set the xmit buf to a REALLY SMALL value, say 12k (1-3 packets) */
+        int bufsize = 12 * 1024;
 
+        if (ksocket_setsockopt(ksock, SOL_SOCKET, SO_SNDBUF,
+                (const void *)&bufsize, sizeof (bufsize), CRED()) != 0) {
+                ksocket_close(ksock, kcred);
+                mutex_destroy(&mux->omux_lock);
+                avl_destroy(&mux->omux_devices);
+                kmem_free(mux->omux_addr, len);
+                kmem_free(mux, sizeof (overlay_mux_t));
+                return (NULL);
+        }
+#endif
+        /*
+         * Set a callback in case we hit socket flow control and need to know
+         * when it's ready to send again.  See the aforementioned
+         * ksocket_socket() comments about the use of kcred vs. being
+         * zone-aware.
+         */
+        ks_cb.ksock_cb_cansend = overlay_mux_cansend_now;
+        if (ksocket_setcallbacks(ksock, &ks_cb, mux, kcred) != 0) {
+                ksocket_close(ksock, kcred);
+                mutex_destroy(&mux->omux_lock);
+                avl_destroy(&mux->omux_devices);
+                kmem_free(mux->omux_addr, len);
+                kmem_free(mux, sizeof (overlay_mux_t));
+                return (NULL);
+        }
+
         /* Once this is called, we need to expect to rx data */
         *errp = ksocket_krecv_set(ksock, overlay_mux_recv, mux);
         if (*errp != 0) {
                 ksocket_close(ksock, kcred);
                 mutex_destroy(&mux->omux_lock);
@@ -352,12 +443,20 @@
         int ret;
 
         /*
          * It'd be nice to be able to use MSG_MBLK_QUICKRELE, unfortunately,
          * that isn't actually supported by UDP at this time.
+         *
+         * Send with MSG_DONTWAIT to indicate clogged UDP sockets upstack.
          */
-        ret = ksocket_sendmblk(mux->omux_ksock, hdr, 0, &mp, kcred);
+        ret = ksocket_sendmblk(mux->omux_ksock, hdr, MSG_DONTWAIT, &mp, kcred);
+        /*
+         * NOTE: ksocket_sendmblk() may send partial packets downstack,
+         * returning what's not sent in &mp (i.e. mp pre-call might be a
+         * b_cont of mp post-call).  We can't hold up this message (it's a
+         * datagram), so we drop, and let the caller cope.
+         */
         if (ret != 0)
                 freemsg(mp);
 
         return (ret);
 }