1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/strsubr.h>
  26 #include <sys/strsun.h>
  27 #include <sys/param.h>
  28 #include <sys/sysmacros.h>
  29 #include <vm/seg_map.h>
  30 #include <vm/seg_kpm.h>
  31 #include <sys/condvar_impl.h>
  32 #include <sys/sendfile.h>
  33 #include <fs/sockfs/nl7c.h>
  34 #include <fs/sockfs/nl7curi.h>
  35 #include <fs/sockfs/socktpi_impl.h>
  36 
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip6.h>
  40 #include <inet/tcp.h>
  41 #include <inet/led.h>
  42 #include <inet/mi.h>
  43 
  44 #include <inet/nca/ncadoorhdr.h>
  45 #include <inet/nca/ncalogd.h>
  46 #include <inet/nca/ncandd.h>
  47 
  48 #include <sys/promif.h>
  49 
  50 /*
  51  * Some externs:
  52  */
  53 
  54 extern boolean_t        nl7c_logd_enabled;
  55 extern void             nl7c_logd_log(uri_desc_t *, uri_desc_t *,
  56                             time_t, ipaddr_t);
  57 extern boolean_t        nl7c_close_addr(struct sonode *);
  58 extern struct sonode    *nl7c_addr2portso(void *);
  59 extern uri_desc_t       *nl7c_http_cond(uri_desc_t *, uri_desc_t *);
  60 
  61 /*
  62  * Various global tuneables:
  63  */
  64 
  65 clock_t         nl7c_uri_ttl = -1;      /* TTL in seconds (-1 == infinite) */
  66 
  67 boolean_t       nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
  68 
  69 uint64_t        nl7c_file_prefetch = 1; /* File cache prefetch pages */
  70 
  71 uint64_t        nl7c_uri_max = 0;       /* Maximum bytes (0 == infinite) */
  72 uint64_t        nl7c_uri_bytes = 0;     /* Bytes of kmem used by URIs */
  73 
  74 /*
  75  * Locals:
  76  */
  77 
  78 static int      uri_rd_response(struct sonode *, uri_desc_t *,
  79                     uri_rd_t *, boolean_t);
  80 static int      uri_response(struct sonode *, uri_desc_t *);
  81 
  82 /*
  83  * HTTP scheme functions called from nl7chttp.c:
  84  */
  85 
  86 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
  87 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
  88 boolean_t nl7c_http_cmp(void *, void *);
  89 mblk_t *nl7c_http_persist(struct sonode *);
  90 void nl7c_http_free(void *arg);
  91 void nl7c_http_init(void);
  92 
  93 /*
  94  * Counters that need to move to kstat and/or be removed:
  95  */
  96 
  97 volatile uint64_t nl7c_uri_request = 0;
  98 volatile uint64_t nl7c_uri_hit = 0;
  99 volatile uint64_t nl7c_uri_pass = 0;
 100 volatile uint64_t nl7c_uri_miss = 0;
 101 volatile uint64_t nl7c_uri_temp = 0;
 102 volatile uint64_t nl7c_uri_more = 0;
 103 volatile uint64_t nl7c_uri_data = 0;
 104 volatile uint64_t nl7c_uri_sendfilev = 0;
 105 volatile uint64_t nl7c_uri_reclaim_calls = 0;
 106 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
 107 volatile uint64_t nl7c_uri_pass_urifail = 0;
 108 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
 109 volatile uint64_t nl7c_uri_more_get = 0;
 110 volatile uint64_t nl7c_uri_pass_method = 0;
 111 volatile uint64_t nl7c_uri_pass_option = 0;
 112 volatile uint64_t nl7c_uri_more_eol = 0;
 113 volatile uint64_t nl7c_uri_more_http = 0;
 114 volatile uint64_t nl7c_uri_pass_http = 0;
 115 volatile uint64_t nl7c_uri_pass_addfail = 0;
 116 volatile uint64_t nl7c_uri_pass_temp = 0;
 117 volatile uint64_t nl7c_uri_expire = 0;
 118 volatile uint64_t nl7c_uri_purge = 0;
 119 volatile uint64_t nl7c_uri_NULL1 = 0;
 120 volatile uint64_t nl7c_uri_NULL2 = 0;
 121 volatile uint64_t nl7c_uri_close = 0;
 122 volatile uint64_t nl7c_uri_temp_close = 0;
 123 volatile uint64_t nl7c_uri_free = 0;
 124 volatile uint64_t nl7c_uri_temp_free = 0;
 125 volatile uint64_t nl7c_uri_temp_mk = 0;
 126 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
 127 
 128 /*
 129  * Various kmem_cache_t's:
 130  */
 131 
 132 kmem_cache_t *nl7c_uri_kmc;
 133 kmem_cache_t *nl7c_uri_rd_kmc;
 134 static kmem_cache_t *uri_desb_kmc;
 135 static kmem_cache_t *uri_segmap_kmc;
 136 
 137 static void uri_kmc_reclaim(void *);
 138 
 139 static void nl7c_uri_reclaim(void);
 140 
 141 /*
 142  * The URI hash is a dynamically sized A/B bucket hash, when the current
 143  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
 144  * the next P2Ps[] size is created.
 145  *
 146  * All lookups are done in the current hash then the new hash (if any),
 147  * if there is a new has then when a current hash bucket chain is examined
 148  * any uri_desc_t members will be migrated to the new hash and when the
 149  * last uri_desc_t has been migrated then the new hash will become the
 150  * current and the previous current hash will be freed leaving a single
 151  * hash.
 152  *
 153  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
 154  * and can be accessed only after aquiring the uri_hash_access lock (for
 155  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
 156  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
 157  * is placed on all uri_desc_t uri_hash_t list members.
 158  *
 159  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
 160  * access and WRITER for write access. Note, WRITER is only required for
 161  * hash geometry changes.
 162  *
 163  * uri_hash_which - which uri_hash_ab[] is the current hash.
 164  *
 165  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
 166  *
 167  * uri_hash_sz[] - the size for each uri_hash_ab[].
 168  *
 169  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
 170  *
 171  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
 172  * a new uri_hash_ab[] needs to be created.
 173  *
 174  * uri_hash_ab[] - the uri_hash_t entries.
 175  *
 176  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
 177  */
 178 
 179 typedef struct uri_hash_s {
 180         struct uri_desc_s       *list;          /* List of uri_t(s) */
 181         kmutex_t                lock;
 182 } uri_hash_t;
 183 
 184 #define URI_HASH_AVRG   5       /* Desired average hash chain length */
 185 #define URI_HASH_N_INIT 9       /* P2Ps[] initial index */
 186 
 187 static krwlock_t        uri_hash_access;
 188 static uint32_t         uri_hash_which = 0;
 189 static uint32_t         uri_hash_n[2] = {URI_HASH_N_INIT, 0};
 190 static uint32_t         uri_hash_sz[2] = {0, 0};
 191 static uint32_t         uri_hash_cnt[2] = {0, 0};
 192 static uint32_t         uri_hash_overflow[2] = {0, 0};
 193 static uri_hash_t       *uri_hash_ab[2] = {NULL, NULL};
 194 static uri_hash_t       *uri_hash_lru[2] = {NULL, NULL};
 195 
 196 /*
 197  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
 198  * these primes have been foud to be useful for prime sized hash tables.
 199  */
 200 
 201 static const int P2Ps[] = {
 202         0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
 203         6143, 12281, 24571, 49139, 98299, 196597, 393209,
 204         786431, 1572853, 3145721, 6291449, 12582893, 0};
 205 
 206 /*
 207  * Hash macros:
 208  *
 209  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
 210  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
 211  *    return in c, *ep points to past end of (char *), on return *cp will
 212  *    point to the last char consumed.
 213  *
 214  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
 215  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
 216  *
 217  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
 218  *    a hash index 0 - (uri_hash_sz[which] - 1).
 219  *
 220  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
 221  *    uri_desc_t members from hash from to hash to.
 222  *
 223  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
 224  *    *uri which is a member of the uri_hash_t *hp list with a previous
 225  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
 226  *    check for cur hash empty, if so make new cur. Note, as this macro
 227  *    can change a hash chain it needs to be run under hash_access as
 228  *    RW_WRITER, futher as it can change the new hash to cur any access
 229  *    to the hash state must be done after either dropping locks and
 230  *    starting over or making sure the global state is consistent after
 231  *    as before.
 232  */
 233 
 234 #define H2A(cp, ep, c) {                                                \
 235         int     _h = 2;                                                 \
 236         int     _n = 0;                                                 \
 237         char    _hc;                                                    \
 238                                                                         \
 239         while (_h > 0 && ++(cp) < (ep)) {                         \
 240                 if (_h == 1)                                            \
 241                         _n *= 0x10;                                     \
 242                 _hc = *(cp);                                            \
 243                 if (_hc >= '0' && _hc <= '9')                             \
 244                         _n += _hc - '0';                                \
 245                 else if (_hc >= 'a' || _hc <= 'f')                        \
 246                         _n += _hc - 'W';                                \
 247                 else if (_hc >= 'A' || _hc <= 'F')                        \
 248                         _n += _hc - '7';                                \
 249                 _h--;                                                   \
 250         }                                                               \
 251         (c) = _n;                                                       \
 252 }
 253 
 254 #define URI_HASH(hv, cp, ep) {                                          \
 255         char    *_s = (cp);                                             \
 256         char    _c;                                                     \
 257                                                                         \
 258         while (_s < (ep)) {                                          \
 259                 if ((_c = *_s) == '%') {                                \
 260                         H2A(_s, (ep), _c);                              \
 261                 }                                                       \
 262                 CHASH(hv, _c);                                          \
 263                 _s++;                                                   \
 264         }                                                               \
 265 }
 266 
 267 #define URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
 268 
 269 #define URI_HASH_MIGRATE(from, hp, to) {                                \
 270         uri_desc_t      *_nuri;                                         \
 271         uint32_t        _nhix;                                          \
 272         uri_hash_t      *_nhp;                                          \
 273                                                                         \
 274         mutex_enter(&(hp)->lock);                                        \
 275         while ((_nuri = (hp)->list) != NULL) {                               \
 276                 (hp)->list = _nuri->hash;                         \
 277                 atomic_dec_32(&uri_hash_cnt[(from)]);               \
 278                 atomic_inc_32(&uri_hash_cnt[(to)]);                 \
 279                 _nhix = _nuri->hvalue;                                       \
 280                 URI_HASH_IX(_nhix, to);                                 \
 281                 _nhp = &uri_hash_ab[(to)][_nhix];                   \
 282                 mutex_enter(&_nhp->lock);                                \
 283                 _nuri->hash = _nhp->list;                         \
 284                 _nhp->list = _nuri;                                  \
 285                 _nuri->hit = 0;                                              \
 286                 mutex_exit(&_nhp->lock);                         \
 287         }                                                               \
 288         mutex_exit(&(hp)->lock);                                 \
 289 }
 290 
 291 #define URI_HASH_UNLINK(cur, new, hp, puri, uri) {                      \
 292         if ((puri) != NULL) {                                           \
 293                 (puri)->hash = (uri)->hash;                               \
 294         } else {                                                        \
 295                 (hp)->list = (uri)->hash;                         \
 296         }                                                               \
 297         if (atomic_dec_32_nv(&uri_hash_cnt[(cur)]) == 0 &&          \
 298             uri_hash_ab[(new)] != NULL) {                               \
 299                 kmem_free(uri_hash_ab[cur],                             \
 300                     sizeof (uri_hash_t) * uri_hash_sz[cur]);            \
 301                 uri_hash_ab[(cur)] = NULL;                              \
 302                 uri_hash_lru[(cur)] = NULL;                             \
 303                 uri_hash_which = (new);                                 \
 304         } else {                                                        \
 305                 uri_hash_lru[(cur)] = (hp);                             \
 306         }                                                               \
 307 }
 308 
 309 void
 310 nl7c_uri_init(void)
 311 {
 312         uint32_t        cur = uri_hash_which;
 313 
 314         rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
 315 
 316         uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
 317         uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
 318         uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
 319             KM_SLEEP);
 320         uri_hash_lru[cur] = uri_hash_ab[cur];
 321 
 322         nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
 323             0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
 324 
 325         nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
 326             sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 327 
 328         uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
 329             sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 330 
 331         uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
 332             sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 333 
 334         nl7c_http_init();
 335 }
 336 
 337 #define CV_SZ   16
 338 
 339 void
 340 nl7c_mi_report_hash(mblk_t *mp)
 341 {
 342         uri_hash_t      *hp, *pend;
 343         uri_desc_t      *uri;
 344         uint32_t        cur;
 345         uint32_t        new;
 346         int             n, nz, tot;
 347         uint32_t        cv[CV_SZ + 1];
 348 
 349         rw_enter(&uri_hash_access, RW_READER);
 350         cur = uri_hash_which;
 351         new = cur ? 0 : 1;
 352 next:
 353         for (n = 0; n <= CV_SZ; n++)
 354                 cv[n] = 0;
 355         nz = 0;
 356         tot = 0;
 357         hp = &uri_hash_ab[cur][0];
 358         pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
 359         while (hp < pend) {
 360                 n = 0;
 361                 for (uri = hp->list; uri != NULL; uri = uri->hash) {
 362                         n++;
 363                 }
 364                 tot += n;
 365                 if (n > 0)
 366                         nz++;
 367                 if (n > CV_SZ)
 368                         n = CV_SZ;
 369                 cv[n]++;
 370                 hp++;
 371         }
 372 
 373         (void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
 374             "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
 375             uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
 376         (void) mi_mpprintf(mp, "Free=%d", cv[0]);
 377         for (n = 1; n < CV_SZ; n++) {
 378                 int     pn = 0;
 379                 char    pv[5];
 380                 char    *pp = pv;
 381 
 382                 for (pn = n; pn < 1000; pn *= 10)
 383                         *pp++ = ' ';
 384                 *pp = 0;
 385                 (void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
 386         }
 387         (void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
 388 
 389         if (cur != new && uri_hash_ab[new] != NULL) {
 390                 cur = new;
 391                 goto next;
 392         }
 393         rw_exit(&uri_hash_access);
 394 }
 395 
 396 void
 397 nl7c_mi_report_uri(mblk_t *mp)
 398 {
 399         uri_hash_t      *hp;
 400         uri_desc_t      *uri;
 401         uint32_t        cur;
 402         uint32_t        new;
 403         int             ix;
 404         int             ret;
 405         char            sc;
 406 
 407         rw_enter(&uri_hash_access, RW_READER);
 408         cur = uri_hash_which;
 409         new = cur ? 0 : 1;
 410 next:
 411         for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
 412                 hp = &uri_hash_ab[cur][ix];
 413                 mutex_enter(&hp->lock);
 414                 uri = hp->list;
 415                 while (uri != NULL) {
 416                         sc = *(uri->path.ep);
 417                         *(uri->path.ep) = 0;
 418                         ret = mi_mpprintf(mp, "%s: %d %d %d",
 419                             uri->path.cp, (int)uri->resplen,
 420                             (int)uri->respclen, (int)uri->count);
 421                         *(uri->path.ep) = sc;
 422                         if (ret == -1) break;
 423                         uri = uri->hash;
 424                 }
 425                 mutex_exit(&hp->lock);
 426                 if (ret == -1) break;
 427         }
 428         if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
 429                 cur = new;
 430                 goto next;
 431         }
 432         rw_exit(&uri_hash_access);
 433 }
 434 
 435 /*
 436  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
 437  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
 438  * will be freed by REF_RELE() on return.
 439  */
 440 
 441 void
 442 nl7c_uri_inactive(uri_desc_t *uri)
 443 {
 444         int64_t  bytes = 0;
 445 
 446         if (uri->tail) {
 447                 uri_rd_t *rdp = &uri->response;
 448                 uri_rd_t *free = NULL;
 449 
 450                 while (rdp) {
 451                         if (rdp->off == -1) {
 452                                 bytes += rdp->sz;
 453                                 kmem_free(rdp->data.kmem, rdp->sz);
 454                         } else {
 455                                 VN_RELE(rdp->data.vnode);
 456                         }
 457                         rdp = rdp->next;
 458                         if (free != NULL) {
 459                                 kmem_cache_free(nl7c_uri_rd_kmc, free);
 460                         }
 461                         free = rdp;
 462                 }
 463         }
 464         if (bytes) {
 465                 atomic_add_64(&nl7c_uri_bytes, -bytes);
 466         }
 467         if (uri->scheme != NULL) {
 468                 nl7c_http_free(uri->scheme);
 469         }
 470         if (uri->reqmp) {
 471                 freeb(uri->reqmp);
 472         }
 473 }
 474 
 475 /*
 476  * The reclaim is called by the kmem subsystem when kmem is running
 477  * low. More work is needed to determine the best reclaim policy, for
 478  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
 479  * value using a simple arithmetic backoff of the value every time this
 480  * function is called then call uri_reclaim() to enforce it.
 481  *
 482  * Note, this value remains in place and enforced for all subsequent
 483  * URI request/response processing.
 484  *
 485  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
 486  * the first call here set it to the current uri_bytes value then backoff
 487  * from there.
 488  *
 489  * XXX how do we determine when to increase nl7c_uri_max ???
 490  */
 491 
 492 /*ARGSUSED*/
 493 static void
 494 uri_kmc_reclaim(void *arg)
 495 {
 496         uint64_t new_max;
 497 
 498         if ((new_max = nl7c_uri_max) == 0) {
 499                 /* Currently infinite, initialize to current bytes used */
 500                 nl7c_uri_max = nl7c_uri_bytes;
 501                 new_max = nl7c_uri_bytes;
 502         }
 503         if (new_max > 1) {
 504                 /* Lower max_bytes to 93% of current value */
 505                 new_max >>= 1;                    /* 50% */
 506                 new_max += (new_max >> 1);        /* 75% */
 507                 new_max += (new_max >> 2);        /* 93% */
 508                 if (new_max < nl7c_uri_max)
 509                         nl7c_uri_max = new_max;
 510                 else
 511                         nl7c_uri_max = 1;
 512         }
 513         nl7c_uri_reclaim();
 514 }
 515 
 516 /*
 517  * Delete a uri_desc_t from the URI hash.
 518  */
 519 
 520 static void
 521 uri_delete(uri_desc_t *del)
 522 {
 523         uint32_t        hix;
 524         uri_hash_t      *hp;
 525         uri_desc_t      *uri;
 526         uri_desc_t      *puri;
 527         uint32_t        cur;
 528         uint32_t        new;
 529 
 530         ASSERT(del->hash != URI_TEMP);
 531         rw_enter(&uri_hash_access, RW_WRITER);
 532         cur = uri_hash_which;
 533         new = cur ? 0 : 1;
 534 next:
 535         puri = NULL;
 536         hix = del->hvalue;
 537         URI_HASH_IX(hix, cur);
 538         hp = &uri_hash_ab[cur][hix];
 539         for (uri = hp->list; uri != NULL; uri = uri->hash) {
 540                 if (uri != del) {
 541                         puri = uri;
 542                         continue;
 543                 }
 544                 /*
 545                  * Found the URI, unlink from the hash chain,
 546                  * drop locks, ref release it.
 547                  */
 548                 URI_HASH_UNLINK(cur, new, hp, puri, uri);
 549                 rw_exit(&uri_hash_access);
 550                 REF_RELE(uri);
 551                 return;
 552         }
 553         if (cur != new && uri_hash_ab[new] != NULL) {
 554                 /*
 555                  * Not found in current hash and have a new hash so
 556                  * check the new hash next.
 557                  */
 558                 cur = new;
 559                 goto next;
 560         }
 561         rw_exit(&uri_hash_access);
 562 }
 563 
 564 /*
 565  * Add a uri_desc_t to the URI hash.
 566  */
 567 
 568 static void
 569 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
 570 {
 571         uint32_t        hix;
 572         uri_hash_t      *hp;
 573         uint32_t        cur = uri_hash_which;
 574         uint32_t        new = cur ? 0 : 1;
 575 
 576         /*
 577          * Caller of uri_add() must hold the uri_hash_access rwlock.
 578          */
 579         ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
 580             (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
 581         /*
 582          * uri_add() always succeeds so add a hash ref to the URI now.
 583          */
 584         REF_HOLD(uri);
 585 again:
 586         hix = uri->hvalue;
 587         URI_HASH_IX(hix, cur);
 588         if (uri_hash_ab[new] == NULL &&
 589             uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
 590                 /*
 591                  * Easy case, no new hash and current hasn't overflowed,
 592                  * add URI to current hash and return.
 593                  *
 594                  * Note, the check for uri_hash_cnt[] above aren't done
 595                  * atomictally, i.e. multiple threads can be in this code
 596                  * as RW_READER and update the cnt[], this isn't a problem
 597                  * as the check is only advisory.
 598                  */
 599         fast:
 600                 atomic_inc_32(&uri_hash_cnt[cur]);
 601                 hp = &uri_hash_ab[cur][hix];
 602                 mutex_enter(&hp->lock);
 603                 uri->hash = hp->list;
 604                 hp->list = uri;
 605                 mutex_exit(&hp->lock);
 606                 rw_exit(&uri_hash_access);
 607                 return;
 608         }
 609         if (uri_hash_ab[new] == NULL) {
 610                 /*
 611                  * Need a new a or b hash, if not already RW_WRITER
 612                  * try to upgrade our lock to writer.
 613                  */
 614                 if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
 615                         /*
 616                          * Upgrade failed, we can't simple exit and reenter
 617                          * the lock as after the exit and before the reenter
 618                          * the whole world can change so just wait for writer
 619                          * then do everything again.
 620                          */
 621                         if (nonblocking) {
 622                                 /*
 623                                  * Can't block, use fast-path above.
 624                                  *
 625                                  * XXX should have a background thread to
 626                                  * handle new ab[] in this case so as to
 627                                  * not overflow the cur hash to much.
 628                                  */
 629                                 goto fast;
 630                         }
 631                         rw_exit(&uri_hash_access);
 632                         rwlock = RW_WRITER;
 633                         rw_enter(&uri_hash_access, rwlock);
 634                         cur = uri_hash_which;
 635                         new = cur ? 0 : 1;
 636                         goto again;
 637                 }
 638                 rwlock = RW_WRITER;
 639                 if (uri_hash_ab[new] == NULL) {
 640                         /*
 641                          * Still need a new hash, allocate and initialize
 642                          * the new hash.
 643                          */
 644                         uri_hash_n[new] = uri_hash_n[cur] + 1;
 645                         if (uri_hash_n[new] == 0) {
 646                                 /*
 647                                  * No larger P2Ps[] value so use current,
 648                                  * i.e. 2 of the largest are better than 1 ?
 649                                  */
 650                                 uri_hash_n[new] = uri_hash_n[cur];
 651                                 cmn_err(CE_NOTE, "NL7C: hash index overflow");
 652                         }
 653                         uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
 654                         ASSERT(uri_hash_cnt[new] == 0);
 655                         uri_hash_overflow[new] = uri_hash_sz[new] *
 656                             URI_HASH_AVRG;
 657                         uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
 658                             uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
 659                             KM_SLEEP);
 660                         if (uri_hash_ab[new] == NULL) {
 661                                 /*
 662                                  * Alloc failed, use fast-path above.
 663                                  *
 664                                  * XXX should have a background thread to
 665                                  * handle new ab[] in this case so as to
 666                                  * not overflow the cur hash to much.
 667                                  */
 668                                 goto fast;
 669                         }
 670                         uri_hash_lru[new] = uri_hash_ab[new];
 671                 }
 672         }
 673         /*
 674          * Hashed against current hash so migrate any current hash chain
 675          * members, if any.
 676          *
 677          * Note, the hash chain list can be checked for a non empty list
 678          * outside of the hash chain list lock as the hash chain struct
 679          * can't be destroyed while in the uri_hash_access rwlock, worst
 680          * case is that a non empty list is found and after acquiring the
 681          * lock another thread beats us to it (i.e. migrated the list).
 682          */
 683         hp = &uri_hash_ab[cur][hix];
 684         if (hp->list != NULL) {
 685                 URI_HASH_MIGRATE(cur, hp, new);
 686         }
 687         /*
 688          * If new hash has overflowed before current hash has been
 689          * completely migrated then walk all current hash chains and
 690          * migrate list members now.
 691          */
 692         if (atomic_inc_32_nv(&uri_hash_cnt[new]) >= uri_hash_overflow[new]) {
 693                 for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
 694                         hp = &uri_hash_ab[cur][hix];
 695                         if (hp->list != NULL) {
 696                                 URI_HASH_MIGRATE(cur, hp, new);
 697                         }
 698                 }
 699         }
 700         /*
 701          * Add URI to new hash.
 702          */
 703         hix = uri->hvalue;
 704         URI_HASH_IX(hix, new);
 705         hp = &uri_hash_ab[new][hix];
 706         mutex_enter(&hp->lock);
 707         uri->hash = hp->list;
 708         hp->list = uri;
 709         mutex_exit(&hp->lock);
 710         /*
 711          * Last, check to see if last cur hash chain has been
 712          * migrated, if so free cur hash and make new hash cur.
 713          */
 714         if (uri_hash_cnt[cur] == 0) {
 715                 /*
 716                  * If we don't already hold the uri_hash_access rwlock for
 717                  * RW_WRITE try to upgrade to RW_WRITE and if successful
 718                  * check again and to see if still need to do the free.
 719                  */
 720                 if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
 721                     uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
 722                         kmem_free(uri_hash_ab[cur],
 723                             sizeof (uri_hash_t) * uri_hash_sz[cur]);
 724                         uri_hash_ab[cur] = NULL;
 725                         uri_hash_lru[cur] = NULL;
 726                         uri_hash_which = new;
 727                 }
 728         }
 729         rw_exit(&uri_hash_access);
 730 }
 731 
 732 /*
 733  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
 734  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
 735  * add B_TRUE use the request URI to create a new hash entry. Else if add
 736  * B_FALSE ...
 737  */
 738 
 739 static uri_desc_t *
 740 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
 741 {
 742         uint32_t        hix;
 743         uri_hash_t      *hp;
 744         uri_desc_t      *uri;
 745         uri_desc_t      *puri;
 746         uint32_t        cur;
 747         uint32_t        new;
 748         char            *rcp = ruri->path.cp;
 749         char            *rep = ruri->path.ep;
 750 
 751 again:
 752         rw_enter(&uri_hash_access, RW_READER);
 753         cur = uri_hash_which;
 754         new = cur ? 0 : 1;
 755 nexthash:
 756         puri = NULL;
 757         hix = ruri->hvalue;
 758         URI_HASH_IX(hix, cur);
 759         hp = &uri_hash_ab[cur][hix];
 760         mutex_enter(&hp->lock);
 761         for (uri = hp->list; uri != NULL; uri = uri->hash) {
 762                 char    *ap = uri->path.cp;
 763                 char    *bp = rcp;
 764                 char    a, b;
 765 
 766                 /* Compare paths */
 767                 while (bp < rep && ap < uri->path.ep) {
 768                         if ((a = *ap) == '%') {
 769                                 /* Escaped hex multichar, convert it */
 770                                 H2A(ap, uri->path.ep, a);
 771                         }
 772                         if ((b = *bp) == '%') {
 773                                 /* Escaped hex multichar, convert it */
 774                                 H2A(bp, rep, b);
 775                         }
 776                         if (a != b) {
 777                                 /* Char's don't match */
 778                                 goto nexturi;
 779                         }
 780                         ap++;
 781                         bp++;
 782                 }
 783                 if (bp != rep || ap != uri->path.ep) {
 784                         /* Not same length */
 785                         goto nexturi;
 786                 }
 787                 ap = uri->auth.cp;
 788                 bp = ruri->auth.cp;
 789                 if (ap != NULL) {
 790                         if (bp == NULL) {
 791                                 /* URI has auth request URI doesn't */
 792                                 goto nexturi;
 793                         }
 794                         while (bp < ruri->auth.ep && ap < uri->auth.ep) {
 795                                 if ((a = *ap) == '%') {
 796                                         /* Escaped hex multichar, convert it */
 797                                         H2A(ap, uri->path.ep, a);
 798                                 }
 799                                 if ((b = *bp) == '%') {
 800                                         /* Escaped hex multichar, convert it */
 801                                         H2A(bp, rep, b);
 802                                 }
 803                                 if (a != b) {
 804                                         /* Char's don't match */
 805                                         goto nexturi;
 806                                 }
 807                                 ap++;
 808                                 bp++;
 809                         }
 810                         if (bp != ruri->auth.ep || ap != uri->auth.ep) {
 811                                 /* Not same length */
 812                                 goto nexturi;
 813                         }
 814                 } else if (bp != NULL) {
 815                         /* URI doesn't have auth and request URI does */
 816                         goto nexturi;
 817                 }
 818                 /*
 819                  * Have a path/auth match so before any other processing
 820                  * of requested URI, check for expire or request no cache
 821                  * purge.
 822                  */
 823                 if (uri->expire >= 0 && uri->expire <= ddi_get_lbolt() ||
 824                     ruri->nocache) {
 825                         /*
 826                          * URI has expired or request specified to not use
 827                          * the cached version, unlink the URI from the hash
 828                          * chain, release all locks, release the hash ref
 829                          * on the URI, and last look it up again.
 830                          *
 831                          * Note, this will cause all variants of the named
 832                          * URI to be purged.
 833                          */
 834                         if (puri != NULL) {
 835                                 puri->hash = uri->hash;
 836                         } else {
 837                                 hp->list = uri->hash;
 838                         }
 839                         mutex_exit(&hp->lock);
 840                         atomic_dec_32(&uri_hash_cnt[cur]);
 841                         rw_exit(&uri_hash_access);
 842                         if (ruri->nocache)
 843                                 nl7c_uri_purge++;
 844                         else
 845                                 nl7c_uri_expire++;
 846                         REF_RELE(uri);
 847                         goto again;
 848                 }
 849                 if (uri->scheme != NULL) {
 850                         /*
 851                          * URI has scheme private qualifier(s), if request
 852                          * URI doesn't or if no match skip this URI.
 853                          */
 854                         if (ruri->scheme == NULL ||
 855                             ! nl7c_http_cmp(uri->scheme, ruri->scheme))
 856                                 goto nexturi;
 857                 } else if (ruri->scheme != NULL) {
 858                         /*
 859                          * URI doesn't have scheme private qualifiers but
 860                          * request URI does, no match, skip this URI.
 861                          */
 862                         goto nexturi;
 863                 }
 864                 /*
 865                  * Have a match, ready URI for return, first put a reference
 866                  * hold on the URI, if this URI is currently being processed
 867                  * then have to wait for the processing to be completed and
 868                  * redo the lookup, else return it.
 869                  */
 870                 REF_HOLD(uri);
 871                 mutex_enter(&uri->proclock);
 872                 if (uri->proc != NULL) {
 873                         /* The URI is being processed, wait for completion */
 874                         mutex_exit(&hp->lock);
 875                         rw_exit(&uri_hash_access);
 876                         if (! nonblocking &&
 877                             cv_wait_sig(&uri->waiting, &uri->proclock)) {
 878                                 /*
 879                                  * URI has been processed but things may
 880                                  * have changed while we were away so do
 881                                  * most everything again.
 882                                  */
 883                                 mutex_exit(&uri->proclock);
 884                                 REF_RELE(uri);
 885                                 goto again;
 886                         } else {
 887                                 /*
 888                                  * A nonblocking socket or an interrupted
 889                                  * cv_wait_sig() in the first case can't
 890                                  * block waiting for the processing of the
 891                                  * uri hash hit uri to complete, in both
 892                                  * cases just return failure to lookup.
 893                                  */
 894                                 mutex_exit(&uri->proclock);
 895                                 REF_RELE(uri);
 896                                 return (NULL);
 897                         }
 898                 }
 899                 mutex_exit(&uri->proclock);
 900                 uri->hit++;
 901                 mutex_exit(&hp->lock);
 902                 rw_exit(&uri_hash_access);
 903                 return (uri);
 904         nexturi:
 905                 puri = uri;
 906         }
 907         mutex_exit(&hp->lock);
 908         if (cur != new && uri_hash_ab[new] != NULL) {
 909                 /*
 910                  * Not found in current hash and have a new hash so
 911                  * check the new hash next.
 912                  */
 913                 cur = new;
 914                 goto nexthash;
 915         }
 916 add:
 917         if (! add) {
 918                 /* Lookup only so return failure */
 919                 rw_exit(&uri_hash_access);
 920                 return (NULL);
 921         }
 922         /*
 923          * URI not hashed, finish intialization of the
 924          * request URI, add it to the hash, return it.
 925          */
 926         ruri->hit = 0;
 927         ruri->expire = -1;
 928         ruri->response.sz = 0;
 929         ruri->proc = (struct sonode *)~0;
 930         cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
 931         mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
 932         uri_add(ruri, RW_READER, nonblocking);
 933         /* uri_add() has done rw_exit(&uri_hash_access) */
 934         return (ruri);
 935 }
 936 
 937 /*
 938  * Reclaim URIs until max cache size threshold has been reached.
 939  *
 940  * A CLOCK based reclaim modified with a history (hit counter) counter.
 941  */
 942 
 943 static void
 944 nl7c_uri_reclaim(void)
 945 {
 946         uri_hash_t      *hp, *start, *pend;
 947         uri_desc_t      *uri;
 948         uri_desc_t      *puri;
 949         uint32_t        cur;
 950         uint32_t        new;
 951 
 952         nl7c_uri_reclaim_calls++;
 953 again:
 954         rw_enter(&uri_hash_access, RW_WRITER);
 955         cur = uri_hash_which;
 956         new = cur ? 0 : 1;
 957 next:
 958         hp = uri_hash_lru[cur];
 959         start = hp;
 960         pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
 961         while (nl7c_uri_bytes > nl7c_uri_max) {
 962                 puri = NULL;
 963                 for (uri = hp->list; uri != NULL; uri = uri->hash) {
 964                         if (uri->hit != 0) {
 965                                 /*
 966                                  * Decrement URI activity counter and skip.
 967                                  */
 968                                 uri->hit--;
 969                                 puri = uri;
 970                                 continue;
 971                         }
 972                         if (uri->proc != NULL) {
 973                                 /*
 974                                  * Currently being processed by a socket, skip.
 975                                  */
 976                                 continue;
 977                         }
 978                         /*
 979                          * Found a candidate, no hit(s) since added or last
 980                          * reclaim pass, unlink from it's hash chain, update
 981                          * lru scan pointer, drop lock, ref release it.
 982                          */
 983                         URI_HASH_UNLINK(cur, new, hp, puri, uri);
 984                         if (cur == uri_hash_which) {
 985                                 if (++hp == pend) {
 986                                         /* Wrap pointer */
 987                                         hp = uri_hash_ab[cur];
 988                                 }
 989                                 uri_hash_lru[cur] = hp;
 990                         }
 991                         rw_exit(&uri_hash_access);
 992                         REF_RELE(uri);
 993                         nl7c_uri_reclaim_cnt++;
 994                         goto again;
 995                 }
 996                 if (++hp == pend) {
 997                         /* Wrap pointer */
 998                         hp = uri_hash_ab[cur];
 999                 }
1000                 if (hp == start) {
1001                         if (cur != new && uri_hash_ab[new] != NULL) {
1002                                 /*
1003                                  * Done with the current hash and have a
1004                                  * new hash so check the new hash next.
1005                                  */
1006                                 cur = new;
1007                                 goto next;
1008                         }
1009                 }
1010         }
1011         rw_exit(&uri_hash_access);
1012 }
1013 
1014 /*
1015  * Called for a socket which is being freed prior to close, e.g. errored.
1016  */
1017 
1018 void
1019 nl7c_urifree(struct sonode *so)
1020 {
1021         sotpi_info_t *sti = SOTOTPI(so);
1022         uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1023 
1024         sti->sti_nl7c_uri = NULL;
1025         if (uri->hash != URI_TEMP) {
1026                 uri_delete(uri);
1027                 mutex_enter(&uri->proclock);
1028                 uri->proc = NULL;
1029                 if (CV_HAS_WAITERS(&uri->waiting)) {
1030                         cv_broadcast(&uri->waiting);
1031                 }
1032                 mutex_exit(&uri->proclock);
1033                 nl7c_uri_free++;
1034         } else {
1035                 /* No proclock as uri exclusively owned by so */
1036                 uri->proc = NULL;
1037                 nl7c_uri_temp_free++;
1038         }
1039         REF_RELE(uri);
1040 }
1041 
1042 /*
1043  * ...
1044  *
1045  *      < 0  need more data
1046  *
1047  *        0     parse complete
1048  *
1049  *      > 0  parse error
1050  */
1051 
1052 volatile uint64_t nl7c_resp_pfail = 0;
1053 volatile uint64_t nl7c_resp_ntemp = 0;
1054 volatile uint64_t nl7c_resp_pass = 0;
1055 
1056 static int
1057 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
1058 {
1059         if (! nl7c_http_response(&data, &data[sz], uri, so)) {
1060                 if (data == NULL) {
1061                         /* Parse fail */
1062                         goto pfail;
1063                 }
1064                 /* More data */
1065                 data = NULL;
1066         } else if (data == NULL) {
1067                 goto pass;
1068         }
1069         if (uri->hash != URI_TEMP && uri->nocache) {
1070                 /*
1071                  * After response parse now no cache,
1072                  * delete it from cache, wakeup any
1073                  * waiters on this URI, make URI_TEMP.
1074                  */
1075                 uri_delete(uri);
1076                 mutex_enter(&uri->proclock);
1077                 if (CV_HAS_WAITERS(&uri->waiting)) {
1078                         cv_broadcast(&uri->waiting);
1079                 }
1080                 mutex_exit(&uri->proclock);
1081                 uri->hash = URI_TEMP;
1082                 nl7c_uri_temp_mk++;
1083         }
1084         if (data == NULL) {
1085                 /* More data needed */
1086                 return (-1);
1087         }
1088         /* Success */
1089         return (0);
1090 
1091 pfail:
1092         nl7c_resp_pfail++;
1093         return (EINVAL);
1094 
1095 pass:
1096         nl7c_resp_pass++;
1097         return (ENOTSUP);
1098 }
1099 
1100 /*
1101  * Called to sink application response data, the processing of the data
1102  * is the same for a cached or temp URI (i.e. a URI for which we aren't
1103  * going to cache the URI but want to parse it for detecting response
1104  * data end such that for a persistent connection we can parse the next
1105  * request).
1106  *
1107  * On return 0 is returned for sink success, > 0 on error, and < 0 on
1108  * no so URI (note, data not sinked).
1109  */
1110 
1111 int
1112 nl7c_data(struct sonode *so, uio_t *uio)
1113 {
1114         sotpi_info_t    *sti = SOTOTPI(so);
1115         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1116         iovec_t         *iov;
1117         int             cnt;
1118         int             sz = uio->uio_resid;
1119         char            *data, *alloc;
1120         char            *bp;
1121         uri_rd_t        *rdp;
1122         boolean_t       first;
1123         int             error, perror;
1124 
1125         nl7c_uri_data++;
1126 
1127         if (uri == NULL) {
1128                 /* Socket & NL7C out of sync, disable NL7C */
1129                 sti->sti_nl7c_flags = 0;
1130                 nl7c_uri_NULL1++;
1131                 return (-1);
1132         }
1133 
1134         if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
1135                 sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1136                 first = B_TRUE;
1137         } else {
1138                 first = B_FALSE;
1139         }
1140 
1141         alloc = kmem_alloc(sz, KM_SLEEP);
1142         URI_RD_ADD(uri, rdp, sz, -1);
1143 
1144         if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
1145                 uri_delete(uri);
1146                 uri->hash = URI_TEMP;
1147         }
1148         data = alloc;
1149         alloc = NULL;
1150         rdp->data.kmem = data;
1151         atomic_add_64(&nl7c_uri_bytes, sz);
1152 
1153         bp = data;
1154         while (uio->uio_resid > 0) {
1155                 iov = uio->uio_iov;
1156                 if ((cnt = iov->iov_len) == 0) {
1157                         goto next;
1158                 }
1159                 cnt = MIN(cnt, uio->uio_resid);
1160                 error = xcopyin(iov->iov_base, bp, cnt);
1161                 if (error)
1162                         goto fail;
1163 
1164                 iov->iov_base += cnt;
1165                 iov->iov_len -= cnt;
1166                 uio->uio_resid -= cnt;
1167                 uio->uio_loffset += cnt;
1168                 bp += cnt;
1169         next:
1170                 uio->uio_iov++;
1171                 uio->uio_iovcnt--;
1172         }
1173 
1174         /* Successfull sink of data, response parse the data */
1175         perror = nl7c_resp_parse(so, uri, data, sz);
1176 
1177         /* Send the data out the connection */
1178         error = uri_rd_response(so, uri, rdp, first);
1179         if (error)
1180                 goto fail;
1181 
1182         /* Success */
1183         if (perror == 0 &&
1184             ((uri->respclen == URI_LEN_NOVALUE &&
1185             uri->resplen == URI_LEN_NOVALUE) ||
1186             uri->count >= uri->resplen)) {
1187                 /*
1188                  * No more data needed and no pending response
1189                  * data or current data count >= response length
1190                  * so close the URI processing for this so.
1191                  */
1192                 nl7c_close(so);
1193                 if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1194                         /* Not a persistent connection */
1195                         sti->sti_nl7c_flags = 0;
1196                 }
1197         }
1198 
1199         return (0);
1200 
1201 fail:
1202         if (alloc != NULL) {
1203                 kmem_free(alloc, sz);
1204         }
1205         sti->sti_nl7c_flags = 0;
1206         nl7c_urifree(so);
1207 
1208         return (error);
1209 }
1210 
1211 /*
1212  * Called to read data from file "*fp" at offset "*off" of length "*len"
1213  * for a maximum of "*max_rem" bytes.
1214  *
1215  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
1216  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
1217  * is updated with the number of bytes remaining to be read.
1218  *
1219  * Else, "NULL" is returned.
1220  */
1221 
1222 static char *
1223 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
1224 {
1225         vnode_t *vp = fp->f_vnode;
1226         int     flg = 0;
1227         size_t  size = MIN(*len, max);
1228         char    *data;
1229         int     error;
1230         uio_t   uio;
1231         iovec_t iov;
1232 
1233         (void) VOP_RWLOCK(vp, flg, NULL);
1234 
1235         if (*off > MAXOFFSET_T) {
1236                 VOP_RWUNLOCK(vp, flg, NULL);
1237                 *ret = EFBIG;
1238                 return (NULL);
1239         }
1240 
1241         if (*off + size > MAXOFFSET_T)
1242                 size = (ssize32_t)(MAXOFFSET_T - *off);
1243 
1244         data = kmem_alloc(size, KM_SLEEP);
1245 
1246         iov.iov_base = data;
1247         iov.iov_len = size;
1248         uio.uio_loffset = *off;
1249         uio.uio_iov = &iov;
1250         uio.uio_iovcnt = 1;
1251         uio.uio_resid = size;
1252         uio.uio_segflg = UIO_SYSSPACE;
1253         uio.uio_llimit = MAXOFFSET_T;
1254         uio.uio_fmode = fp->f_flag;
1255 
1256         error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
1257         VOP_RWUNLOCK(vp, flg, NULL);
1258         *ret = error;
1259         if (error) {
1260                 kmem_free(data, size);
1261                 return (NULL);
1262         }
1263         *len = size;
1264         *off += size;
1265         return (data);
1266 }
1267 
1268 /*
1269  * Called to sink application response sendfilev, as with nl7c_data() above
1270  * all the data will be processed by NL7C unless there's an error.
1271  */
1272 
1273 int
1274 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
1275     int sfvc, ssize_t *xfer)
1276 {
1277         sotpi_info_t    *sti = SOTOTPI(so);
1278         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1279         file_t          *fp = NULL;
1280         vnode_t         *vp = NULL;
1281         char            *data = NULL;
1282         u_offset_t      off;
1283         int             len;
1284         int             cnt;
1285         int             total_count = 0;
1286         char            *alloc;
1287         uri_rd_t        *rdp;
1288         int             max;
1289         int             perror;
1290         int             error = 0;
1291         boolean_t       first = B_TRUE;
1292 
1293         nl7c_uri_sendfilev++;
1294 
1295         if (uri == NULL) {
1296                 /* Socket & NL7C out of sync, disable NL7C */
1297                 sti->sti_nl7c_flags = 0;
1298                 nl7c_uri_NULL2++;
1299                 return (0);
1300         }
1301 
1302         if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
1303                 sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
1304 
1305         while (sfvc-- > 0) {
1306                 /*
1307                  * off - the current sfv read file offset or user address.
1308                  *
1309                  * len - the current sfv length in bytes.
1310                  *
1311                  * cnt - number of bytes kmem_alloc()ed.
1312                  *
1313                  * alloc - the kmem_alloc()ed buffer of size "cnt".
1314                  *
1315                  * data - copy of "alloc" used for post alloc references.
1316                  *
1317                  * fp - the current sfv file_t pointer.
1318                  *
1319                  * vp - the current "*vp" vnode_t pointer.
1320                  *
1321                  * Note, for "data" and "fp" and "vp" a NULL value is used
1322                  * when not allocated such that the common failure path "fail"
1323                  * is used.
1324                  */
1325                 off = sfvp->sfv_off;
1326                 len = sfvp->sfv_len;
1327                 cnt = len;
1328 
1329                 if (len == 0) {
1330                         sfvp++;
1331                         continue;
1332                 }
1333 
1334                 if (sfvp->sfv_fd == SFV_FD_SELF) {
1335                         /*
1336                          * User memory, copyin() all the bytes.
1337                          */
1338                         alloc = kmem_alloc(cnt, KM_SLEEP);
1339                         error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
1340                         if (error)
1341                                 goto fail;
1342                 } else {
1343                         /*
1344                          * File descriptor, prefetch some bytes.
1345                          */
1346                         if ((fp = getf(sfvp->sfv_fd)) == NULL) {
1347                                 error = EBADF;
1348                                 goto fail;
1349                         }
1350                         if ((fp->f_flag & FREAD) == 0) {
1351                                 error = EACCES;
1352                                 goto fail;
1353                         }
1354                         vp = fp->f_vnode;
1355                         if (vp->v_type != VREG) {
1356                                 error = EINVAL;
1357                                 goto fail;
1358                         }
1359                         VN_HOLD(vp);
1360 
1361                         /* Read max_rem bytes from file for prefetch */
1362                         if (nl7c_use_kmem) {
1363                                 max = cnt;
1364                         } else {
1365                                 max = MAXBSIZE * nl7c_file_prefetch;
1366                         }
1367                         alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
1368                         if (alloc == NULL)
1369                                 goto fail;
1370 
1371                         releasef(sfvp->sfv_fd);
1372                         fp = NULL;
1373                 }
1374                 URI_RD_ADD(uri, rdp, cnt, -1);
1375                 data = alloc;
1376                 alloc = NULL;
1377                 rdp->data.kmem = data;
1378                 total_count += cnt;
1379                 if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
1380                         uri_delete(uri);
1381                         uri->hash = URI_TEMP;
1382                 }
1383 
1384                 /* Response parse */
1385                 perror = nl7c_resp_parse(so, uri, data, len);
1386 
1387                 /* Send kmem data out the connection */
1388                 error = uri_rd_response(so, uri, rdp, first);
1389 
1390                 if (error)
1391                         goto fail;
1392 
1393                 if (sfvp->sfv_fd != SFV_FD_SELF) {
1394                         /*
1395                          * File descriptor, if any bytes left save vnode_t.
1396                          */
1397                         if (len > cnt) {
1398                                 /* More file data so add it */
1399                                 URI_RD_ADD(uri, rdp, len - cnt, off);
1400                                 rdp->data.vnode = vp;
1401 
1402                                 /* Send vnode data out the connection */
1403                                 error = uri_rd_response(so, uri, rdp, first);
1404                         } else {
1405                                 /* All file data fit in the prefetch */
1406                                 VN_RELE(vp);
1407                         }
1408                         *fileoff += len;
1409                         vp = NULL;
1410                 }
1411                 *xfer += len;
1412                 sfvp++;
1413 
1414                 if (first)
1415                         first = B_FALSE;
1416         }
1417         if (total_count > 0) {
1418                 atomic_add_64(&nl7c_uri_bytes, total_count);
1419         }
1420         if (perror == 0 &&
1421             ((uri->respclen == URI_LEN_NOVALUE &&
1422             uri->resplen == URI_LEN_NOVALUE) ||
1423             uri->count >= uri->resplen)) {
1424                 /*
1425                  * No more data needed and no pending response
1426                  * data or current data count >= response length
1427                  * so close the URI processing for this so.
1428                  */
1429                 nl7c_close(so);
1430                 if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
1431                         /* Not a persistent connection */
1432                         sti->sti_nl7c_flags = 0;
1433                 }
1434         }
1435 
1436         return (0);
1437 
1438 fail:
1439         if (error == EPIPE)
1440                 tsignal(curthread, SIGPIPE);
1441 
1442         if (alloc != NULL)
1443                 kmem_free(data, len);
1444 
1445         if (vp != NULL)
1446                 VN_RELE(vp);
1447 
1448         if (fp != NULL)
1449                 releasef(sfvp->sfv_fd);
1450 
1451         if (total_count > 0) {
1452                 atomic_add_64(&nl7c_uri_bytes, total_count);
1453         }
1454 
1455         sti->sti_nl7c_flags = 0;
1456         nl7c_urifree(so);
1457 
1458         return (error);
1459 }
1460 
1461 /*
1462  * Called for a socket which is closing or when an application has
1463  * completed sending all the response data (i.e. for a persistent
1464  * connection called once for each completed application response).
1465  */
1466 
1467 void
1468 nl7c_close(struct sonode *so)
1469 {
1470         sotpi_info_t    *sti = SOTOTPI(so);
1471         uri_desc_t      *uri = (uri_desc_t *)sti->sti_nl7c_uri;
1472 
1473         if (uri == NULL) {
1474                 /*
1475                  * No URI being processed so might be a listen()er
1476                  * if so do any cleanup, else nothing more to do.
1477                  */
1478                 if (so->so_state & SS_ACCEPTCONN) {
1479                         (void) nl7c_close_addr(so);
1480                 }
1481                 return;
1482         }
1483         sti->sti_nl7c_uri = NULL;
1484         if (uri->hash != URI_TEMP) {
1485                 mutex_enter(&uri->proclock);
1486                 uri->proc = NULL;
1487                 if (CV_HAS_WAITERS(&uri->waiting)) {
1488                         cv_broadcast(&uri->waiting);
1489                 }
1490                 mutex_exit(&uri->proclock);
1491                 nl7c_uri_close++;
1492         } else {
1493                 /* No proclock as uri exclusively owned by so */
1494                 uri->proc = NULL;
1495                 nl7c_uri_temp_close++;
1496         }
1497         REF_RELE(uri);
1498         if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
1499                 nl7c_uri_reclaim();
1500         }
1501 }
1502 
1503 /*
1504  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
1505  * release the segmap mapping. Note, the uri_segmap_t will be freed by
1506  * REF_RELE() on return.
1507  */
1508 
1509 void
1510 uri_segmap_inactive(uri_segmap_t *smp)
1511 {
1512         if (!segmap_kpm) {
1513                 (void) segmap_fault(kas.a_hat, segkmap, smp->base,
1514                     smp->len, F_SOFTUNLOCK, S_OTHER);
1515         }
1516         (void) segmap_release(segkmap, smp->base, SM_DONTNEED);
1517         VN_RELE(smp->vp);
1518 }
1519 
1520 /*
1521  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
1522  * release the reference, one per desballoc() of a segmap page, if a rd_t
1523  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
1524  * last kmem free the uri_desb_t.
1525  */
1526 
1527 static void
1528 uri_desb_free(uri_desb_t *desb)
1529 {
1530         if (desb->segmap != NULL) {
1531                 REF_RELE(desb->segmap);
1532         }
1533         REF_RELE(desb->uri);
1534         kmem_cache_free(uri_desb_kmc, desb);
1535 }
1536 
1537 /*
1538  * Segmap map up to a page of a uri_rd_t file descriptor.
1539  */
1540 
1541 uri_segmap_t *
1542 uri_segmap_map(uri_rd_t *rdp, int bytes)
1543 {
1544         uri_segmap_t    *segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
1545         int             len = MIN(rdp->sz, MAXBSIZE);
1546 
1547         if (len > bytes)
1548                 len = bytes;
1549 
1550         REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
1551         segmap->len = len;
1552         VN_HOLD(rdp->data.vnode);
1553         segmap->vp = rdp->data.vnode;
1554 
1555         segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
1556             segmap_kpm ? SM_FAULT : 0, S_READ);
1557 
1558         if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
1559             F_SOFTLOCK, S_READ) != 0) {
1560                 REF_RELE(segmap);
1561                 return (NULL);
1562         }
1563         return (segmap);
1564 }
1565 
1566 /*
1567  * Chop up the kernel virtual memory area *data of size *sz bytes for
1568  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
1569  * the given template uri_desb_t *temp of max_mblk bytes per.
1570  *
1571  * The values of *data, *sz, and *bytes are updated on return, the
1572  * mblk_t chain is returned.
1573  */
1574 
1575 static mblk_t *
1576 uri_desb_chop(char **data, size_t *sz, int *bytes, uri_desb_t *temp,
1577     int max_mblk, char *eoh, mblk_t *persist)
1578 {
1579         char            *ldata = *data;
1580         size_t          lsz = *sz;
1581         int             lbytes = bytes ? *bytes : lsz;
1582         uri_desb_t      *desb;
1583         mblk_t          *mp = NULL;
1584         mblk_t          *nmp, *pmp = NULL;
1585         int             msz;
1586 
1587         if (lbytes == 0 && lsz == 0)
1588                 return (NULL);
1589 
1590         while (lbytes > 0 && lsz > 0) {
1591                 msz = MIN(lbytes, max_mblk);
1592                 msz = MIN(msz, lsz);
1593                 if (persist && eoh >= ldata && eoh < &ldata[msz]) {
1594                         msz = (eoh - ldata);
1595                         pmp = persist;
1596                         persist = NULL;
1597                         if (msz == 0) {
1598                                 nmp = pmp;
1599                                 pmp = NULL;
1600                                 goto zero;
1601                         }
1602                 }
1603                 desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
1604                 REF_HOLD(temp->uri);
1605                 if (temp->segmap) {
1606                         REF_HOLD(temp->segmap);
1607                 }
1608                 bcopy(temp, desb, sizeof (*desb));
1609                 desb->frtn.free_arg = (caddr_t)desb;
1610                 nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
1611                 if (nmp == NULL) {
1612                         if (temp->segmap) {
1613                                 REF_RELE(temp->segmap);
1614                         }
1615                         REF_RELE(temp->uri);
1616                         if (mp != NULL) {
1617                                 mp->b_next = NULL;
1618                                 freemsg(mp);
1619                         }
1620                         if (persist != NULL) {
1621                                 freeb(persist);
1622                         }
1623                         return (NULL);
1624                 }
1625                 nmp->b_wptr += msz;
1626         zero:
1627                 if (mp != NULL) {
1628                         mp->b_next->b_cont = nmp;
1629                 } else {
1630                         mp = nmp;
1631                 }
1632                 if (pmp != NULL) {
1633                         nmp->b_cont = pmp;
1634                         nmp = pmp;
1635                         pmp = NULL;
1636                 }
1637                 mp->b_next = nmp;
1638                 ldata += msz;
1639                 lsz -= msz;
1640                 lbytes -= msz;
1641         }
1642         *data = ldata;
1643         *sz = lsz;
1644         if (bytes)
1645                 *bytes = lbytes;
1646         return (mp);
1647 }
1648 
1649 /*
1650  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
1651  * the entire mblk_t chain down without flow-control checks.
1652  */
1653 
1654 static int
1655 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
1656 {
1657         struct stdata *stp;
1658         int error = 0;
1659 
1660         ASSERT(vp->v_stream);
1661         stp = vp->v_stream;
1662 
1663         /* Fast check of flags before acquiring the lock */
1664         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
1665                 mutex_enter(&stp->sd_lock);
1666                 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
1667                 mutex_exit(&stp->sd_lock);
1668                 if (error != 0) {
1669                         if (!(stp->sd_flag & STPLEX) &&
1670                             (stp->sd_wput_opt & SW_SIGPIPE)) {
1671                                 error = EPIPE;
1672                         }
1673                         return (error);
1674                 }
1675         }
1676         putnext(stp->sd_wrq, mp);
1677         return (0);
1678 }
1679 
1680 /*
1681  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
1682  */
1683 
1684 static int
1685 uri_rd_response(struct sonode *so,
1686     uri_desc_t *uri,
1687     uri_rd_t *rdp,
1688     boolean_t first)
1689 {
1690         vnode_t         *vp = SOTOV(so);
1691         int             max_mblk = (int)vp->v_stream->sd_maxblk;
1692         int             wsz;
1693         mblk_t          *mp, *wmp, *persist;
1694         int             write_bytes;
1695         uri_rd_t        rd;
1696         uri_desb_t      desb;
1697         uri_segmap_t    *segmap = NULL;
1698         char            *segmap_data;
1699         size_t          segmap_sz;
1700         int             error;
1701         int             fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
1702             ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
1703 
1704 
1705         /* Initialize template uri_desb_t */
1706         desb.frtn.free_func = uri_desb_free;
1707         desb.frtn.free_arg = NULL;
1708         desb.uri = uri;
1709 
1710         /* Get a local copy of the rd_t */
1711         bcopy(rdp, &rd, sizeof (rd));
1712         do {
1713                 if (first) {
1714                         /*
1715                          * For first kstrwrite() enough data to get
1716                          * things going, note non blocking version of
1717                          * kstrwrite() will be used below.
1718                          */
1719                         write_bytes = P2ROUNDUP((max_mblk * 4),
1720                             MAXBSIZE * nl7c_file_prefetch);
1721                 } else {
1722                         if ((write_bytes = so->so_sndbuf) == 0)
1723                                 write_bytes = vp->v_stream->sd_qn_maxpsz;
1724                         ASSERT(write_bytes > 0);
1725                         write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
1726                 }
1727                 /*
1728                  * Chop up to a write_bytes worth of data.
1729                  */
1730                 wmp = NULL;
1731                 wsz = write_bytes;
1732                 do {
1733                         if (rd.sz == 0)
1734                                 break;
1735                         if (rd.off == -1) {
1736                                 if (uri->eoh >= rd.data.kmem &&
1737                                     uri->eoh < &rd.data.kmem[rd.sz]) {
1738                                         persist = nl7c_http_persist(so);
1739                                 } else {
1740                                         persist = NULL;
1741                                 }
1742                                 desb.segmap = NULL;
1743                                 mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
1744                                     &wsz, &desb, max_mblk, uri->eoh, persist);
1745                                 if (mp == NULL) {
1746                                         error = ENOMEM;
1747                                         goto invalidate;
1748                                 }
1749                         } else {
1750                                 if (segmap == NULL) {
1751                                         segmap = uri_segmap_map(&rd,
1752                                             write_bytes);
1753                                         if (segmap == NULL) {
1754                                                 error = ENOMEM;
1755                                                 goto invalidate;
1756                                         }
1757                                         desb.segmap = segmap;
1758                                         segmap_data = segmap->base;
1759                                         segmap_sz = segmap->len;
1760                                 }
1761                                 mp = uri_desb_chop(&segmap_data, &segmap_sz,
1762                                     &wsz, &desb, max_mblk, NULL, NULL);
1763                                 if (mp == NULL) {
1764                                         error = ENOMEM;
1765                                         goto invalidate;
1766                                 }
1767                                 if (segmap_sz == 0) {
1768                                         rd.sz -= segmap->len;
1769                                         rd.off += segmap->len;
1770                                         REF_RELE(segmap);
1771                                         segmap = NULL;
1772                                 }
1773                         }
1774                         if (wmp == NULL) {
1775                                 wmp = mp;
1776                         } else {
1777                                 wmp->b_next->b_cont = mp;
1778                                 wmp->b_next = mp->b_next;
1779                                 mp->b_next = NULL;
1780                         }
1781                 } while (wsz > 0 && rd.sz > 0);
1782 
1783                 wmp->b_next = NULL;
1784                 if (first) {
1785                         /* First kstrwrite(), use noqwait */
1786                         if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
1787                                 goto invalidate;
1788                         /*
1789                          * For the rest of the kstrwrite()s use SO_SNDBUF
1790                          * worth of data at a time, note these kstrwrite()s
1791                          * may (will) block one or more times.
1792                          */
1793                         first = B_FALSE;
1794                 } else {
1795                         if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
1796                                 if (error == EAGAIN) {
1797                                         nl7c_uri_rd_EAGAIN++;
1798                                         if ((error =
1799                                             kstrwritempnoqwait(vp, wmp)) != 0)
1800                                                 goto invalidate;
1801                                 } else
1802                                         goto invalidate;
1803                         }
1804                 }
1805         } while (rd.sz > 0);
1806 
1807         return (0);
1808 
1809 invalidate:
1810         if (segmap) {
1811                 REF_RELE(segmap);
1812         }
1813         if (wmp)
1814                 freemsg(wmp);
1815 
1816         return (error);
1817 }
1818 
1819 /*
1820  * Send the URI uri_desc_t *uri response out the socket_t *so.
1821  */
1822 
1823 static int
1824 uri_response(struct sonode *so, uri_desc_t *uri)
1825 {
1826         uri_rd_t        *rdp = &uri->response;
1827         boolean_t       first = B_TRUE;
1828         int             error;
1829 
1830         while (rdp != NULL) {
1831                 error = uri_rd_response(so, uri, rdp, first);
1832                 if (error != 0) {
1833                         goto invalidate;
1834                 }
1835                 first = B_FALSE;
1836                 rdp = rdp->next;
1837         }
1838         return (0);
1839 
1840 invalidate:
1841         if (uri->hash != URI_TEMP)
1842                 uri_delete(uri);
1843         return (error);
1844 }
1845 
1846 /*
1847  * The pchars[] array is indexed by a char to determine if it's a
1848  * valid URI path component chararcter where:
1849  *
1850  *    pchar       = unreserved | escaped |
1851  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
1852  *
1853  *    unreserved  = alphanum | mark
1854  *
1855  *    alphanum    = alpha | digit
1856  *
1857  *    alpha       = lowalpha | upalpha
1858  *
1859  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1860  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1861  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1862  *                  "y" | "z"
1863  *
1864  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
1865  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
1866  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
1867  *                  "Y" | "Z"
1868  *
1869  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1870  *                  "8" | "9"
1871  *
1872  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
1873  *
1874  *    escaped     = "%" hex hex
1875  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1876  *                  "a" | "b" | "c" | "d" | "e" | "f"
1877  */
1878 
1879 static char pchars[] = {
1880     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x00 - 0x07 */
1881     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x08 - 0x0F */
1882     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x10 - 0x17 */
1883     0, 0, 0, 0, 0, 0, 0, 0,     /* 0x18 - 0x1F */
1884     0, 1, 0, 0, 1, 1, 1, 1,     /* 0x20 - 0x27 */
1885     0, 0, 1, 1, 1, 1, 1, 1,     /* 0x28 - 0x2F */
1886     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x30 - 0x37 */
1887     1, 1, 1, 0, 0, 1, 0, 0,     /* 0x38 - 0x3F */
1888     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x40 - 0x47 */
1889     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x48 - 0x4F */
1890     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x50 - 0x57 */
1891     1, 1, 1, 0, 0, 0, 0, 1,     /* 0x58 - 0x5F */
1892     0, 1, 1, 1, 1, 1, 1, 1,     /* 0x60 - 0x67 */
1893     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x68 - 0x6F */
1894     1, 1, 1, 1, 1, 1, 1, 1,     /* 0x70 - 0x77 */
1895     1, 1, 1, 0, 0, 0, 1, 0      /* 0x78 - 0x7F */
1896 };
1897 
1898 #define PCHARS_MASK 0x7F
1899 
1900 /*
1901  * This is the main L7 request message parse, we are called each time
1902  * new data is availble for a socket, each time a single buffer of the
1903  * entire message to date is given.
1904  *
1905  * Here we parse the request looking for the URI, parse it, and if a
1906  * supported scheme call the scheme parser to commplete the parse of any
1907  * headers which may further qualify the identity of the requested object
1908  * then lookup it up in the URI hash.
1909  *
1910  * Return B_TRUE for more processing.
1911  *
1912  * Note, at this time the parser supports the generic message format as
1913  * specified in RFC 822 with potentional limitations as specified in RFC
1914  * 2616 for HTTP messages.
1915  *
1916  * Note, the caller supports an mblk_t chain, for now the parser(s)
1917  * require the complete header in a single mblk_t. This is the common
1918  * case and certainly for high performance environments, if at a future
1919  * date mblk_t chains are important the parse can be reved to process
1920  * mblk_t chains.
1921  */
1922 
1923 boolean_t
1924 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
1925 {
1926         sotpi_info_t *sti = SOTOTPI(so);
1927         char    *cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
1928         char    *ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
1929         char    *get = "GET ";
1930         char    *post = "POST ";
1931         char    c;
1932         char    *uris;
1933         uri_desc_t *uri = NULL;
1934         uri_desc_t *ruri = NULL;
1935         mblk_t  *reqmp;
1936         uint32_t hv = 0;
1937 
1938         if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
1939                 nl7c_uri_pass_dupbfail++;
1940                 goto pass;
1941         }
1942         /*
1943          * Allocate and initialize minimumal state for the request
1944          * uri_desc_t, in the cache hit case this uri_desc_t will
1945          * be freed.
1946          */
1947         uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
1948         REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
1949         uri->hash = NULL;
1950         uri->tail = NULL;
1951         uri->scheme = NULL;
1952         uri->count = 0;
1953         uri->reqmp = reqmp;
1954 
1955         /*
1956          * Set request time to current time.
1957          */
1958         sti->sti_nl7c_rtime = gethrestime_sec();
1959 
1960         /*
1961          * Parse the Request-Line for the URI.
1962          *
1963          * For backwards HTTP version compatable reasons skip any leading
1964          * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
1965          */
1966         while (cp < ep && (*cp == '\r' || *cp == '\n')) {
1967                 cp++;
1968         }
1969         while (cp < ep && *get == *cp) {
1970                 get++;
1971                 cp++;
1972         }
1973         if (*get != 0) {
1974                 /* Note a "GET", check for "POST" */
1975                 while (cp < ep && *post == *cp) {
1976                         post++;
1977                         cp++;
1978                 }
1979                 if (*post != 0) {
1980                         if (cp == ep) {
1981                                 nl7c_uri_more_get++;
1982                                 goto more;
1983                         }
1984                         /* Not a "GET" or a "POST", just pass */
1985                         nl7c_uri_pass_method++;
1986                         goto pass;
1987                 }
1988                 /* "POST", don't cache but still may want to parse */
1989                 uri->hash = URI_TEMP;
1990         }
1991         /*
1992          * Skip over URI path char(s) and save start and past end pointers.
1993          */
1994         uris = cp;
1995         while (cp < ep && (c = *cp) != ' ' && c != '\r') {
1996                 if (c == '?') {
1997                         /* Don't cache but still may want to parse */
1998                         uri->hash = URI_TEMP;
1999                 }
2000                 CHASH(hv, c);
2001                 cp++;
2002         }
2003         if (c != '\r' && cp == ep) {
2004                 nl7c_uri_more_eol++;
2005                 goto more;
2006         }
2007         /*
2008          * Request-Line URI parsed, pass the rest of the request on
2009          * to the the http scheme parse.
2010          */
2011         uri->path.cp = uris;
2012         uri->path.ep = cp;
2013         uri->hvalue = hv;
2014         if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
2015                 /*
2016                  * Parse not successful or pass on request, the pointer
2017                  * to the parse pointer "cp" is overloaded such that ! NULL
2018                  * for more data and NULL for bad parse of request or pass.
2019                  */
2020                 if (cp != NULL) {
2021                         nl7c_uri_more_http++;
2022                         goto more;
2023                 }
2024                 nl7c_uri_pass_http++;
2025                 goto pass;
2026         }
2027         if (uri->nocache) {
2028                 uri->hash = URI_TEMP;
2029                 (void) uri_lookup(uri, B_FALSE, nonblocking);
2030         } else if (uri->hash == URI_TEMP) {
2031                 uri->nocache = B_TRUE;
2032                 (void) uri_lookup(uri, B_FALSE, nonblocking);
2033         }
2034 
2035         if (uri->hash == URI_TEMP) {
2036                 if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
2037                         /* Temporary URI so skip hash processing */
2038                         nl7c_uri_request++;
2039                         nl7c_uri_temp++;
2040                         goto temp;
2041                 }
2042                 /* Not persistent so not interested in the response */
2043                 nl7c_uri_pass_temp++;
2044                 goto pass;
2045         }
2046         /*
2047          * Check the URI hash for a cached response, save the request
2048          * uri in case we need it below.
2049          */
2050         ruri = uri;
2051         if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
2052                 /*
2053                  * Failed to lookup due to nonblocking wait required,
2054                  * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
2055                  * failure, ... Just pass on this request.
2056                  */
2057                 nl7c_uri_pass_addfail++;
2058                 goto pass;
2059         }
2060         nl7c_uri_request++;
2061         if (uri->response.sz > 0) {
2062                 /*
2063                  * We have the response cached, update recv mblk rptr
2064                  * to reflect the data consumed in parse.
2065                  */
2066                 mblk_t  *mp = sti->sti_nl7c_rcv_mp;
2067 
2068                 if (cp == (char *)mp->b_wptr) {
2069                         sti->sti_nl7c_rcv_mp = mp->b_cont;
2070                         mp->b_cont = NULL;
2071                         freeb(mp);
2072                 } else {
2073                         mp->b_rptr = (unsigned char *)cp;
2074                 }
2075                 nl7c_uri_hit++;
2076                 /* If logging enabled log request */
2077                 if (nl7c_logd_enabled) {
2078                         ipaddr_t faddr;
2079 
2080                         if (so->so_family == AF_INET) {
2081                                 /* Only support IPv4 addrs */
2082                                 faddr = ((struct sockaddr_in *)
2083                                     sti->sti_faddr_sa) ->sin_addr.s_addr;
2084                         } else {
2085                                 faddr = 0;
2086                         }
2087                         /* XXX need to pass response type, e.g. 200, 304 */
2088                         nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
2089                 }
2090 
2091                 /* If conditional request check for substitute response */
2092                 if (ruri->conditional) {
2093                         uri = nl7c_http_cond(ruri, uri);
2094                 }
2095 
2096                 /*
2097                  * Release reference on request URI, send the response out
2098                  * the socket, release reference on response uri, set the
2099                  * *ret value to B_TRUE to indicate request was consumed
2100                  * then return B_FALSE to indcate no more data needed.
2101                  */
2102                 REF_RELE(ruri);
2103                 (void) uri_response(so, uri);
2104                 REF_RELE(uri);
2105                 *ret = B_TRUE;
2106                 return (B_FALSE);
2107         }
2108         /*
2109          * Miss the cache, the request URI is in the cache waiting for
2110          * application write-side data to fill it.
2111          */
2112         nl7c_uri_miss++;
2113 temp:
2114         /*
2115          * A miss or temp URI for which response data is needed, link
2116          * uri to so and so to uri, set WAITWRITE in the so such that
2117          * read-side processing is suspended (so the next read() gets
2118          * the request data) until a write() is processed by NL7C.
2119          *
2120          * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
2121          */
2122         uri->proc = so;
2123         sti->sti_nl7c_uri = uri;
2124         sti->sti_nl7c_flags |= NL7C_WAITWRITE;
2125         *ret = B_FALSE;
2126         return (B_FALSE);
2127 
2128 more:
2129         /* More data is needed, note fragmented recv not supported */
2130         nl7c_uri_more++;
2131 
2132 pass:
2133         /* Pass on this request */
2134         nl7c_uri_pass++;
2135         nl7c_uri_request++;
2136         if (ruri != NULL) {
2137                 REF_RELE(ruri);
2138         }
2139         if (uri) {
2140                 REF_RELE(uri);
2141         }
2142         sti->sti_nl7c_flags = 0;
2143         *ret = B_FALSE;
2144         return (B_FALSE);
2145 }