Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>


  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  *
  36  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
  37  * static char rcsid[] =
  38  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
  39  */
  40 /*
  41  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  42  * Use is subject to license terms.

  43  */
  44 
  45 /*
  46  * The BPF implements the following access controls for zones attempting
  47  * to read and write data. Writing of data requires that the net_rawaccess
  48  * privilege is held whilst reading data requires either net_rawaccess or
  49  * net_observerability.
  50  *
  51  *                              | Shared |  Exclusive |   Global
  52  * -----------------------------+--------+------------+------------+
  53  * DLT_IPNET in local zone      |  Read  |    Read    |    Read    |
  54  * -----------------------------+--------+------------+------------+
  55  * Raw access to local zone NIC |  None  | Read/Write | Read/Write |
  56  * -----------------------------+--------+------------+------------+
  57  * Raw access to all NICs       |  None  |    None    | Read/Write |
  58  * -----------------------------+--------+------------+------------+
  59  *
  60  * The BPF driver is written as a cloning driver: each call to bpfopen()
  61  * allocates a new minor number. This provides BPF with a 1:1 relationship
  62  * between open's and close's. There is some amount of "descriptor state"


1379 
1380 /*
1381  * Copy the interface name to the ifreq.
1382  */
1383 static int
1384 bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
1385 {
1386 
1387         mutex_enter(&d->bd_lock);
1388         if (d->bd_bif == NULL) {
1389                 mutex_exit(&d->bd_lock);
1390                 return (EINVAL);
1391         }
1392 
1393         (void) strlcpy(buffer, d->bd_ifname, bufsize);
1394         mutex_exit(&d->bd_lock);
1395 
1396         return (0);
1397 }
1398 
1399 /*
1400  * Support for poll() system call
1401  *
1402  * Return true iff the specific operation will not block indefinitely - with
1403  * the assumption that it is safe to positively acknowledge a request for the
1404  * ability to write to the BPF device.
1405  * Otherwise, return false but make a note that a selnotify() must be done.
1406  */
1407 int
1408 bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
1409     struct pollhead **phpp)
1410 {
1411         struct bpf_d *d = bpf_dev_get(getminor(dev));
1412 








1413         if (events & (POLLIN | POLLRDNORM)) {
1414                 /*
1415                  * An imitation of the FIONREAD ioctl code.
1416                  */
1417                 mutex_enter(&d->bd_lock);
1418                 if (d->bd_hlen != 0 ||
1419                     ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1420                     d->bd_slen != 0)) {
1421                         *reventsp |= events & (POLLIN | POLLRDNORM);
1422                 } else {






1423                         *reventsp = 0;
1424                         if (!anyyet)
1425                                 *phpp = &d->bd_poll;
1426                         /* Start the read timeout if necessary */
1427                         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1428                                 bpf_clear_timeout(d);
1429                                 /*
1430                                  * Only allow the timeout to be set once.
1431                                  */
1432                                 if (d->bd_callout == 0)
1433                                         d->bd_callout = timeout(bpf_timed_out,
1434                                             d, d->bd_rtout);
1435                                 d->bd_state = BPF_WAITING;
1436                         }
1437                 }
1438                 mutex_exit(&d->bd_lock);
1439         }
1440 
1441         return (0);
1442 }
1443 
1444 /*
1445  * Copy data from an mblk_t chain into a buffer. This works for ipnet




  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  *
  36  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
  37  * static char rcsid[] =
  38  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
  39  */
  40 /*
  41  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  42  * Use is subject to license terms.
  43  * Copyright 2017 Joyent, Inc.
  44  */
  45 
  46 /*
  47  * The BPF implements the following access controls for zones attempting
  48  * to read and write data. Writing of data requires that the net_rawaccess
  49  * privilege is held whilst reading data requires either net_rawaccess or
  50  * net_observerability.
  51  *
  52  *                              | Shared |  Exclusive |   Global
  53  * -----------------------------+--------+------------+------------+
  54  * DLT_IPNET in local zone      |  Read  |    Read    |    Read    |
  55  * -----------------------------+--------+------------+------------+
  56  * Raw access to local zone NIC |  None  | Read/Write | Read/Write |
  57  * -----------------------------+--------+------------+------------+
  58  * Raw access to all NICs       |  None  |    None    | Read/Write |
  59  * -----------------------------+--------+------------+------------+
  60  *
  61  * The BPF driver is written as a cloning driver: each call to bpfopen()
  62  * allocates a new minor number. This provides BPF with a 1:1 relationship
  63  * between open's and close's. There is some amount of "descriptor state"


1380 
1381 /*
1382  * Copy the interface name to the ifreq.
1383  */
1384 static int
1385 bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
1386 {
1387 
1388         mutex_enter(&d->bd_lock);
1389         if (d->bd_bif == NULL) {
1390                 mutex_exit(&d->bd_lock);
1391                 return (EINVAL);
1392         }
1393 
1394         (void) strlcpy(buffer, d->bd_ifname, bufsize);
1395         mutex_exit(&d->bd_lock);
1396 
1397         return (0);
1398 }
1399 
1400 /* ARGSUSED */







1401 int
1402 bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
1403     struct pollhead **phpp)
1404 {
1405         struct bpf_d *d = bpf_dev_get(getminor(dev));
1406 
1407         /*
1408          * Until this driver is modified to issue proper pollwakeup() calls on
1409          * its pollhead, edge-triggered polling is not allowed.
1410          */
1411         if (events & POLLET) {
1412                 return (EPERM);
1413         }
1414 
1415         if (events & (POLLIN | POLLRDNORM)) {
1416                 /*
1417                  * An imitation of the FIONREAD ioctl code.
1418                  */
1419                 mutex_enter(&d->bd_lock);
1420                 if (d->bd_hlen != 0 ||
1421                     ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1422                     d->bd_slen != 0)) {
1423                         *reventsp |= events & (POLLIN | POLLRDNORM);
1424                 } else {
1425                         /*
1426                          * Until the bpf driver has been updated to include
1427                          * adequate pollwakeup() logic, no pollhead will be
1428                          * emitted here, preventing the resource from being
1429                          * cached by poll()/devpoll/epoll.
1430                          */
1431                         *reventsp = 0;


1432                         /* Start the read timeout if necessary */
1433                         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1434                                 bpf_clear_timeout(d);
1435                                 /*
1436                                  * Only allow the timeout to be set once.
1437                                  */
1438                                 if (d->bd_callout == 0)
1439                                         d->bd_callout = timeout(bpf_timed_out,
1440                                             d, d->bd_rtout);
1441                                 d->bd_state = BPF_WAITING;
1442                         }
1443                 }
1444                 mutex_exit(&d->bd_lock);
1445         }
1446 
1447         return (0);
1448 }
1449 
1450 /*
1451  * Copy data from an mblk_t chain into a buffer. This works for ipnet