Print this page
NEX-19712 SMB directory listings sometimes wrong after NEX-19025
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
NEX-19025 CIFS gets confused with filenames containing enhanced Unicode
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
and: (fix build, check-rtime)
NEX-4458 Incorrect directory listing response for non-UNICODE clients
Reviewed by: Matt Barden <Matt.Barden@nexenta.com>
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
NEX-2460 libfksmbd should not link with libsmb
SMB-50 User-mode SMB server
 Includes work by these authors:
 Thomas Keiser <thomas.keiser@nexenta.com>
 Albert Lee <trisk@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/common/smbsrv/smb_utf8.c
          +++ new/usr/src/common/smbsrv/smb_utf8.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   *
  25      - * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
       25 + * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  /*
  29      - * Multibyte/wide-char conversion routines. Wide-char encoding provides
  30      - * a fixed size character encoding that maps to the Unicode 16-bit
  31      - * (UCS-2) character set standard. Multibyte or UCS transformation
  32      - * format (UTF) encoding is a variable length character encoding scheme
  33      - * that s compatible with existing ASCII characters and guarantees that
  34      - * the resultant strings do not contain embedded null characters. Both
  35      - * types of encoding provide a null terminator: single byte for UTF-8
  36      - * and a wide-char null for Unicode. See RFC 2044.
  37      - *
  38      - * The table below illustrates the UTF-8 encoding scheme. The letter x
  39      - * indicates bits available for encoding the character value.
  40      - *
  41      - *      UCS-2                   UTF-8 octet sequence (binary)
  42      - *      0x0000-0x007F   0xxxxxxx
  43      - *      0x0080-0x07FF   110xxxxx 10xxxxxx
  44      - *      0x0800-0xFFFF   1110xxxx 10xxxxxx 10xxxxxx
  45      - *
  46      - * RFC 2044
  47      - * UTF-8,a transformation format of UNICODE and ISO 10646
  48      - * F. Yergeau
  49      - * Alis Technologies
  50      - * October 1996
       29 + * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire
       30 + * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs).
  51   31   */
  52   32  
  53   33  #if defined(_KERNEL) || defined(_FAKE_KERNEL)
  54   34  #include <sys/types.h>
  55   35  #include <sys/sunddi.h>
  56      -#else
       36 +#else   /* _KERNEL || _FAKE_KERNEL */
  57   37  #include <stdio.h>
  58   38  #include <stdlib.h>
  59      -#include <assert.h>
  60   39  #include <strings.h>
  61      -#endif
       40 +#include <iconv.h>
       41 +#include <assert.h>
       42 +#endif  /* _KERNEL || _FAKE_KERNEL */
       43 +#include <sys/u8_textprep.h>
  62   44  #include <smbsrv/string.h>
  63   45  
  64   46  
  65   47  /*
  66   48   * mbstowcs
  67   49   *
  68   50   * The mbstowcs() function converts a multibyte character string
  69   51   * mbstring into a wide character string wcstring. No more than
  70   52   * nwchars wide characters are stored. A terminating null wide
  71   53   * character is appended if there is room.
  72   54   *
  73   55   * Returns the number of wide characters converted, not counting
  74   56   * any terminating null wide character. Returns -1 if an invalid
  75   57   * multibyte character is encountered.
  76   58   */
  77   59  size_t
  78      -smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
       60 +smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars)
  79   61  {
  80      -        int len;
  81      -        smb_wchar_t     *start = wcstring;
       62 +        size_t mbslen, wcslen;
       63 +        int err;
  82   64  
  83      -        while (nwchars--) {
  84      -                len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX);
  85      -                if (len < 0) {
  86      -                        *wcstring = 0;
  87      -                        return ((size_t)-1);
  88      -                }
       65 +        /* NULL or empty input is allowed. */
       66 +        if (mbs == NULL || *mbs == '\0') {
       67 +                if (wcs != NULL && nwchars > 0)
       68 +                        *wcs = 0;
       69 +                return (0);
       70 +        }
  89   71  
  90      -                if (*mbstring == 0)
  91      -                        break;
  92      -
  93      -                ++wcstring;
  94      -                mbstring += len;
       72 +        /*
       73 +         * Traditional mbstowcs(3C) allows wcs==NULL to get the length.
       74 +         * SMB never calls it that way, but let's future-proof.
       75 +         */
       76 +        if (wcs == NULL) {
       77 +                return ((size_t)-1);
  95   78          }
  96   79  
  97      -        return (wcstring - start);
       80 +        mbslen = strlen(mbs);
       81 +        wcslen = nwchars;
       82 +        err = uconv_u8tou16((const uchar_t *)mbs, &mbslen,
       83 +            wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN);
       84 +        if (err != 0)
       85 +                return ((size_t)-1);
       86 +
       87 +        if (wcslen < nwchars)
       88 +                wcs[wcslen] = 0;
       89 +
       90 +        return (wcslen);
  98   91  }
  99   92  
 100   93  
 101   94  /*
 102   95   * mbtowc
 103   96   *
 104   97   * The mbtowc() function converts a multibyte character mbchar into
 105   98   * a wide character and stores the result in the object pointed to
 106   99   * by wcharp. Up to nbytes bytes are examined.
 107  100   *
 108  101   * If mbchar is NULL, mbtowc() returns zero to indicate that shift
 109  102   * states are not supported.  Shift states are used to switch between
 110  103   * representation modes using reserved bytes to signal shifting
 111  104   * without them being interpreted as characters.  If mbchar is null
 112  105   * mbtowc should return non-zero if the current locale requires shift
 113  106   * states.  Otherwise it should be return 0.
 114  107   *
 115  108   * If mbchar is non-null, returns the number of bytes processed in
 116      - * mbchar.  If mbchar is invalid, returns -1.
      109 + * mbchar.  If mbchar is null, convert the null (wcharp=0) but
      110 + * return length zero.  If mbchar is invalid, returns -1.
 117  111   */
 118  112  int /*ARGSUSED*/
 119      -smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
      113 +smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes)
 120  114  {
 121      -        unsigned char mbyte;
 122      -        smb_wchar_t wide_char;
 123      -        int count;
 124      -        int bytes_left;
      115 +        uint32_t wide_char;
      116 +        int count, err;
      117 +        size_t mblen;
      118 +        size_t wclen;
 125  119  
 126  120          if (mbchar == NULL)
 127  121                  return (0); /* no shift states */
 128  122  
 129      -        /* 0xxxxxxx -> 1 byte ASCII encoding */
 130      -        if (((mbyte = *mbchar++) & 0x80) == 0) {
 131      -                if (wcharp)
 132      -                        *wcharp = (smb_wchar_t)mbyte;
 133      -
 134      -                return (mbyte ? 1 : 0);
 135      -        }
 136      -
 137      -        /* 10xxxxxx -> invalid first byte */
 138      -        if ((mbyte & 0x40) == 0)
      123 +        /*
      124 +         * How many bytes in this symbol?
      125 +         */
      126 +        count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err);
      127 +        if (count < 0)
 139  128                  return (-1);
 140  129  
 141      -        wide_char = mbyte;
 142      -        if ((mbyte & 0x20) == 0) {
 143      -                wide_char &= 0x1f;
 144      -                bytes_left = 1;
 145      -        } else if ((mbyte & 0x10) == 0) {
 146      -                wide_char &= 0x0f;
 147      -                bytes_left = 2;
 148      -        } else {
      130 +        mblen = count;
      131 +        wclen = 1;
      132 +        err = uconv_u8tou32((const uchar_t *)mbchar, &mblen,
      133 +            &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN);
      134 +        if (err != 0)
 149  135                  return (-1);
      136 +        if (wclen == 0) {
      137 +                wide_char = 0;
      138 +                count = 0;
 150  139          }
 151  140  
 152      -        count = 1;
 153      -        while (bytes_left--) {
 154      -                if (((mbyte = *mbchar++) & 0xc0) != 0x80)
 155      -                        return (-1);
 156      -
 157      -                count++;
 158      -                wide_char = (wide_char << 6) | (mbyte & 0x3f);
 159      -        }
 160      -
 161  141          if (wcharp)
 162  142                  *wcharp = wide_char;
 163  143  
 164  144          return (count);
 165  145  }
 166  146  
 167  147  
 168  148  /*
 169  149   * wctomb
 170  150   *
 171  151   * The wctomb() function converts a wide character wchar into a multibyte
 172  152   * character and stores the result in mbchar. The object pointed to by
 173  153   * mbchar must be large enough to accommodate the multibyte character.
 174  154   *
 175  155   * Returns the numberof bytes written to mbchar.
      156 + * Note: handles null like any 1-byte char.
 176  157   */
 177  158  int
 178      -smb_wctomb(char *mbchar, smb_wchar_t wchar)
      159 +smb_wctomb(char *mbchar, uint32_t wchar)
 179  160  {
 180      -        if ((wchar & ~0x7f) == 0) {
 181      -                *mbchar = (char)wchar;
 182      -                return (1);
 183      -        }
      161 +        char junk[MTS_MB_CUR_MAX+1];
      162 +        size_t mblen;
      163 +        size_t wclen;
      164 +        int err;
 184  165  
 185      -        if ((wchar & ~0x7ff) == 0) {
 186      -                *mbchar++ = (wchar >> 6) | 0xc0;
 187      -                *mbchar = (wchar & 0x3f) | 0x80;
 188      -                return (2);
 189      -        }
      166 +        if (mbchar == NULL)
      167 +                mbchar = junk;
 190  168  
 191      -        *mbchar++ = (wchar >> 12) | 0xe0;
 192      -        *mbchar++ = ((wchar >> 6) & 0x3f) | 0x80;
 193      -        *mbchar = (wchar & 0x3f) | 0x80;
 194      -        return (3);
      169 +        mblen = MTS_MB_CUR_MAX;
      170 +        wclen = 1;
      171 +        err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen,
      172 +            UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL);
      173 +        if (err != 0)
      174 +                return (-1);
      175 +
      176 +        return ((int)mblen);
 195  177  }
 196  178  
 197  179  
 198  180  /*
 199  181   * wcstombs
 200  182   *
 201  183   * The wcstombs() function converts a wide character string wcstring
 202  184   * into a multibyte character string mbstring. Up to nbytes bytes are
 203  185   * stored in mbstring. Partial multibyte characters at the end of the
 204  186   * string are not stored. The multibyte character string is null
 205  187   * terminated if there is room.
 206  188   *
 207  189   * Returns the number of bytes converted, not counting the terminating
 208      - * null byte.
      190 + * null byte. Returns -1 if an invalid WC sequence is encountered.
 209  191   */
 210  192  size_t
 211      -smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
      193 +smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes)
 212  194  {
 213      -        char *start = mbstring;
 214      -        const smb_wchar_t *wcp = wcstring;
 215      -        smb_wchar_t wide_char = 0;
 216      -        char buf[4];
 217      -        size_t len;
      195 +        size_t mbslen, wcslen;
      196 +        int err;
 218  197  
 219      -        if ((mbstring == NULL) || (wcstring == NULL))
      198 +        /* NULL or empty input is allowed. */
      199 +        if (wcs == NULL || *wcs == 0) {
      200 +                if (mbs != NULL && nbytes > 0)
      201 +                        *mbs = '\0';
 220  202                  return (0);
      203 +        }
 221  204  
 222      -        while (nbytes > MTS_MB_CHAR_MAX) {
 223      -                wide_char = *wcp++;
 224      -                len = smb_wctomb(mbstring, wide_char);
 225      -
 226      -                if (wide_char == 0)
 227      -                        /*LINTED E_PTRDIFF_OVERFLOW*/
 228      -                        return (mbstring - start);
 229      -
 230      -                mbstring += len;
 231      -                nbytes -= len;
      205 +        /*
      206 +         * Traditional wcstombs(3C) allows mbs==NULL to get the length.
      207 +         * SMB never calls it that way, but let's future-proof.
      208 +         */
      209 +        if (mbs == NULL) {
      210 +                return ((size_t)-1);
 232  211          }
 233  212  
 234      -        while (wide_char && nbytes) {
 235      -                wide_char = *wcp++;
 236      -                if ((len = smb_wctomb(buf, wide_char)) > nbytes) {
 237      -                        *mbstring = 0;
 238      -                        break;
 239      -                }
      213 +        /*
      214 +         * Compute wcslen
      215 +         */
      216 +        wcslen = 0;
      217 +        while (wcs[wcslen] != 0)
      218 +                wcslen++;
 240  219  
 241      -                bcopy(buf, mbstring, len);
 242      -                mbstring += len;
 243      -                nbytes -= len;
 244      -        }
      220 +        mbslen = nbytes;
      221 +        err = uconv_u16tou8(wcs, &wcslen,
      222 +            (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN);
      223 +        if (err != 0)
      224 +                return ((size_t)-1);
 245  225  
 246      -        /*LINTED E_PTRDIFF_OVERFLOW*/
 247      -        return (mbstring - start);
      226 +        if (mbslen < nbytes)
      227 +                mbs[mbslen] = '\0';
      228 +
      229 +        return (mbslen);
 248  230  }
 249  231  
 250  232  
 251  233  /*
 252  234   * Returns the number of bytes that would be written if the multi-
 253  235   * byte string mbs was converted to a wide character string, not
 254  236   * counting the terminating null wide character.
 255  237   */
 256  238  size_t
 257  239  smb_wcequiv_strlen(const char *mbs)
 258  240  {
 259      -        smb_wchar_t     wide_char;
      241 +        uint32_t        wide_char;
 260  242          size_t bytes;
 261  243          size_t len = 0;
 262  244  
 263  245          while (*mbs) {
 264  246                  bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
 265  247                  if (bytes == ((size_t)-1))
 266  248                          return ((size_t)-1);
      249 +                mbs += bytes;
 267  250  
 268  251                  len += sizeof (smb_wchar_t);
 269      -                mbs += bytes;
      252 +                if (bytes > 3) {
      253 +                        /*
      254 +                         * Extended unicode, so TWO smb_wchar_t
      255 +                         */
      256 +                        len += sizeof (smb_wchar_t);
      257 +                }
 270  258          }
 271  259  
 272  260          return (len);
 273  261  }
 274  262  
 275  263  
 276  264  /*
 277  265   * Returns the number of bytes that would be written if the multi-
 278      - * byte string mbs was converted to a single byte character string,
 279      - * not counting the terminating null character.
      266 + * byte string mbs was converted to an OEM character string,
      267 + * (smb_mbstooem) not counting the terminating null character.
 280  268   */
 281  269  size_t
 282  270  smb_sbequiv_strlen(const char *mbs)
 283  271  {
 284      -        smb_wchar_t     wide_char;
 285  272          size_t nbytes;
 286  273          size_t len = 0;
 287  274  
 288  275          while (*mbs) {
 289      -                nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
      276 +                nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX);
 290  277                  if (nbytes == ((size_t)-1))
 291  278                          return ((size_t)-1);
      279 +                if (nbytes == 0)
      280 +                        break;
 292  281  
 293      -                if (wide_char & 0xFF00)
 294      -                        len += sizeof (smb_wchar_t);
 295      -                else
 296      -                        ++len;
      282 +                if (nbytes == 1) {
      283 +                        /* ASCII */
      284 +                        len++;
      285 +                } else if (nbytes < 8) {
      286 +                        /* Compute OEM length */
      287 +                        char mbsbuf[8];
      288 +                        uint8_t oembuf[8];
      289 +                        int oemlen;
      290 +                        (void) strlcpy(mbsbuf, mbs, nbytes+1);
      291 +                        oemlen = smb_mbstooem(oembuf, mbsbuf, 8);
      292 +                        if (oemlen < 0)
      293 +                                return ((size_t)-1);
      294 +                        len += oemlen;
      295 +                } else {
      296 +                        return ((size_t)-1);
      297 +                }
 297  298  
 298  299                  mbs += nbytes;
 299  300          }
 300  301  
 301  302          return (len);
 302  303  }
 303  304  
      305 +/*
      306 + * Convert OEM strings to/from internal (UTF-8) form.
      307 + *
      308 + * We rarely encounter these anymore because all modern
      309 + * SMB clients use Unicode (UTF-16). The few cases where
      310 + * this IS still called are normally using ASCII, i.e.
      311 + * tag names etc. so short-cut those cases.  If we get
      312 + * something non-ASCII we have to call iconv.
      313 + *
      314 + * If we were to really support OEM code pages, we would
      315 + * need to have a way to set the OEM code page from some
      316 + * configuration value.  For now it's always CP850.
      317 + * See also ./smb_oem.c
      318 + */
      319 +static char smb_oem_codepage[32] = "CP850";
 304  320  
 305  321  /*
 306  322   * stombs
 307  323   *
 308      - * Convert a regular null terminated string 'string' to a UTF-8 encoded
 309      - * null terminated multi-byte string 'mbstring'. Only full converted
 310      - * UTF-8 characters will be written 'mbstring'. If a character will not
 311      - * fit within the remaining buffer space or 'mbstring' will overflow
 312      - * max_mblen, the conversion process will be terminated and 'mbstring'
 313      - * will be null terminated.
      324 + * Convert a null terminated OEM string 'string' to a UTF-8 string
      325 + * no longer than max_mblen (null terminated if space).
 314  326   *
 315      - * Returns the number of bytes written to 'mbstring', excluding the
 316      - * terminating null character.
      327 + * If the input string contains invalid OEM characters, a value
      328 + * of -1 will be returned. Otherwise returns the length of 'mbs',
      329 + * excluding the terminating null character.
 317  330   *
 318  331   * If either mbstring or string is a null pointer, -1 is returned.
 319  332   */
 320  333  int
 321      -smb_stombs(char *mbstring, char *string, int max_mblen)
      334 +smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen)
 322  335  {
 323      -        char *start = mbstring;
 324      -        unsigned char *p = (unsigned char *)string;
 325      -        int space_left = max_mblen;
 326      -        int     len;
 327      -        smb_wchar_t     wide_char;
 328      -        char buf[4];
      336 +        uchar_t *p;
      337 +        int     oemlen;
      338 +        int     rlen;
      339 +        boolean_t need_iconv = B_FALSE;
 329  340  
 330      -        if (!mbstring || !string)
      341 +        if (mbs == NULL || oems == NULL)
 331  342                  return (-1);
 332  343  
 333      -        while (*p && space_left > 2) {
 334      -                wide_char = *p++;
 335      -                len = smb_wctomb(mbstring, wide_char);
 336      -                mbstring += len;
 337      -                space_left -= len;
      344 +        /*
      345 +         * Check if the oems is all ASCII (and get the length
      346 +         * while we're at it) so we know if we need to iconv.
      347 +         * We usually can avoid the iconv calls.
      348 +         */
      349 +        oemlen = 0;
      350 +        p = (uchar_t *)oems;
      351 +        while (*p != '\0') {
      352 +                oemlen++;
      353 +                if (*p & 0x80)
      354 +                        need_iconv = B_TRUE;
      355 +                p++;
 338  356          }
 339  357  
 340      -        if (*p) {
 341      -                wide_char = *p;
 342      -                if ((len = smb_wctomb(buf, wide_char)) < 2) {
 343      -                        *mbstring = *buf;
 344      -                        mbstring += len;
 345      -                        space_left -= len;
 346      -                }
      358 +        if (need_iconv) {
      359 +                int     rc;
      360 +                char    *obuf = mbs;
      361 +                size_t  olen = max_mblen;
      362 +                size_t  ilen = oemlen;
      363 +#if defined(_KERNEL) || defined(_FAKE_KERNEL)
      364 +                char *ibuf = (char *)oems;
      365 +                kiconv_t ic;
      366 +                int     err;
      367 +
      368 +                ic = kiconv_open("UTF-8", smb_oem_codepage);
      369 +                if (ic == (kiconv_t)-1)
      370 +                        goto just_copy;
      371 +                rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
      372 +                (void) kiconv_close(ic);
      373 +#else   /* _KERNEL || _FAKE_KERNEL */
      374 +                const char *ibuf = (char *)oems;
      375 +                iconv_t ic;
      376 +                ic = iconv_open("UTF-8", smb_oem_codepage);
      377 +                if (ic == (iconv_t)-1)
      378 +                        goto just_copy;
      379 +                rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
      380 +                (void) iconv_close(ic);
      381 +#endif  /* _KERNEL || _FAKE_KERNEL */
      382 +                if (rc < 0)
      383 +                        return (-1);
      384 +                /* Return val. is output bytes. */
      385 +                rlen = (max_mblen - olen);
      386 +        } else {
      387 +        just_copy:
      388 +                rlen = oemlen;
      389 +                if (rlen > max_mblen)
      390 +                        rlen = max_mblen;
      391 +                bcopy(oems, mbs, rlen);
 347  392          }
      393 +        if (rlen < max_mblen)
      394 +                mbs[rlen] = '\0';
 348  395  
 349      -        *mbstring = '\0';
 350      -
 351      -        /*LINTED E_PTRDIFF_OVERFLOW*/
 352      -        return (mbstring - start);
      396 +        return (rlen);
 353  397  }
 354  398  
 355  399  
 356  400  /*
 357  401   * mbstos
 358  402   *
 359      - * Convert a null terminated multi-byte string 'mbstring' to a regular
 360      - * null terminated string 'string'.  A 1-byte character in 'mbstring'
 361      - * maps to a 1-byte character in 'string'. A 2-byte character in
 362      - * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
 363      - * Otherwise the upper byte null will be discarded to ensure that the
 364      - * output stream does not contain embedded null characters.
      403 + * Convert a null terminated multi-byte string 'mbs' to an OEM string
      404 + * no longer than max_oemlen (null terminated if space).
 365  405   *
 366      - * If the input stream contains invalid multi-byte characters, a value
 367      - * of -1 will be returned. Otherwise the length of 'string', excluding
 368      - * the terminating null character, is returned.
      406 + * If the input string contains invalid multi-byte characters, a value
      407 + * of -1 will be returned. Otherwise returns the length of 'oems',
      408 + * excluding the terminating null character.
 369  409   *
 370  410   * If either mbstring or string is a null pointer, -1 is returned.
 371  411   */
 372  412  int
 373      -smb_mbstos(char *string, const char *mbstring)
      413 +smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen)
 374  414  {
 375      -        smb_wchar_t wc;
 376      -        unsigned char *start = (unsigned char *)string;
 377      -        int len;
      415 +        uchar_t *p;
      416 +        int     mbslen;
      417 +        int     rlen;
      418 +        boolean_t need_iconv = B_FALSE;
 378  419  
 379      -        if (string == NULL || mbstring == NULL)
      420 +        if (oems == NULL || mbs == NULL)
 380  421                  return (-1);
 381  422  
 382      -        while (*mbstring) {
 383      -                if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) {
 384      -                        *string = 0;
 385      -                        return (-1);
 386      -                }
      423 +        /*
      424 +         * Check if the mbs is all ASCII (and get the length
      425 +         * while we're at it) so we know if we need to iconv.
      426 +         * We usually can avoid the iconv calls.
      427 +         */
      428 +        mbslen = 0;
      429 +        p = (uchar_t *)mbs;
      430 +        while (*p != '\0') {
      431 +                mbslen++;
      432 +                if (*p & 0x80)
      433 +                        need_iconv = B_TRUE;
      434 +                p++;
      435 +        }
 387  436  
 388      -                if (wc & 0xFF00) {
 389      -                        /*LINTED E_BAD_PTR_CAST_ALIGN*/
 390      -                        *((smb_wchar_t *)string) = wc;
 391      -                        string += sizeof (smb_wchar_t);
 392      -                }
 393      -                else
 394      -                {
 395      -                        *string = (unsigned char)wc;
 396      -                        string++;
 397      -                }
      437 +        if (need_iconv) {
      438 +                int     rc;
      439 +                char    *obuf = (char *)oems;
      440 +                size_t  olen = max_oemlen;
      441 +                size_t  ilen = mbslen;
      442 +#if defined(_KERNEL) || defined(_FAKE_KERNEL)
      443 +                char *ibuf = (char *)mbs;
      444 +                kiconv_t ic;
      445 +                int     err;
 398  446  
 399      -                mbstring += len;
      447 +                ic = kiconv_open(smb_oem_codepage, "UTF-8");
      448 +                if (ic == (kiconv_t)-1)
      449 +                        goto just_copy;
      450 +                rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
      451 +                (void) kiconv_close(ic);
      452 +#else   /* _KERNEL || _FAKE_KERNEL */
      453 +                const char *ibuf = mbs;
      454 +                iconv_t ic;
      455 +                ic = iconv_open(smb_oem_codepage, "UTF-8");
      456 +                if (ic == (iconv_t)-1)
      457 +                        goto just_copy;
      458 +                rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
      459 +                (void) iconv_close(ic);
      460 +#endif  /* _KERNEL || _FAKE_KERNEL */
      461 +                if (rc < 0)
      462 +                        return (-1);
      463 +                /* Return val. is output bytes. */
      464 +                rlen = (max_oemlen - olen);
      465 +        } else {
      466 +        just_copy:
      467 +                rlen = mbslen;
      468 +                if (rlen > max_oemlen)
      469 +                        rlen = max_oemlen;
      470 +                bcopy(mbs, oems, rlen);
 400  471          }
      472 +        if (rlen < max_oemlen)
      473 +                oems[rlen] = '\0';
 401  474  
 402      -        *string = 0;
 403      -
 404      -        /*LINTED E_PTRDIFF_OVERFLOW*/
 405      -        return ((unsigned char *)string - start);
      475 +        return (rlen);
 406  476  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX