Print this page
NEX-19712 SMB directory listings sometimes wrong after NEX-19025
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
NEX-19025 CIFS gets confused with filenames containing enhanced Unicode
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
and: (fix build, check-rtime)
NEX-4458 Incorrect directory listing response for non-UNICODE clients
Reviewed by: Matt Barden <Matt.Barden@nexenta.com>
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
NEX-2460 libfksmbd should not link with libsmb
SMB-50 User-mode SMB server
Includes work by these authors:
Thomas Keiser <thomas.keiser@nexenta.com>
Albert Lee <trisk@nexenta.com>
*** 20,66 ****
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
! * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
/*
! * Multibyte/wide-char conversion routines. Wide-char encoding provides
! * a fixed size character encoding that maps to the Unicode 16-bit
! * (UCS-2) character set standard. Multibyte or UCS transformation
! * format (UTF) encoding is a variable length character encoding scheme
! * that s compatible with existing ASCII characters and guarantees that
! * the resultant strings do not contain embedded null characters. Both
! * types of encoding provide a null terminator: single byte for UTF-8
! * and a wide-char null for Unicode. See RFC 2044.
! *
! * The table below illustrates the UTF-8 encoding scheme. The letter x
! * indicates bits available for encoding the character value.
! *
! * UCS-2 UTF-8 octet sequence (binary)
! * 0x0000-0x007F 0xxxxxxx
! * 0x0080-0x07FF 110xxxxx 10xxxxxx
! * 0x0800-0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
! *
! * RFC 2044
! * UTF-8,a transformation format of UNICODE and ISO 10646
! * F. Yergeau
! * Alis Technologies
! * October 1996
*/
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
#include <sys/types.h>
#include <sys/sunddi.h>
! #else
#include <stdio.h>
#include <stdlib.h>
- #include <assert.h>
#include <strings.h>
! #endif
#include <smbsrv/string.h>
/*
* mbstowcs
--- 20,48 ----
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
! * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
! * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire
! * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs).
*/
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
#include <sys/types.h>
#include <sys/sunddi.h>
! #else /* _KERNEL || _FAKE_KERNEL */
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
! #include <iconv.h>
! #include <assert.h>
! #endif /* _KERNEL || _FAKE_KERNEL */
! #include <sys/u8_textprep.h>
#include <smbsrv/string.h>
/*
* mbstowcs
*** 73,102 ****
* Returns the number of wide characters converted, not counting
* any terminating null wide character. Returns -1 if an invalid
* multibyte character is encountered.
*/
size_t
! smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars)
{
! int len;
! smb_wchar_t *start = wcstring;
! while (nwchars--) {
! len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX);
! if (len < 0) {
! *wcstring = 0;
return ((size_t)-1);
}
! if (*mbstring == 0)
! break;
! ++wcstring;
! mbstring += len;
! }
! return (wcstring - start);
}
/*
* mbtowc
--- 55,95 ----
* Returns the number of wide characters converted, not counting
* any terminating null wide character. Returns -1 if an invalid
* multibyte character is encountered.
*/
size_t
! smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars)
{
! size_t mbslen, wcslen;
! int err;
! /* NULL or empty input is allowed. */
! if (mbs == NULL || *mbs == '\0') {
! if (wcs != NULL && nwchars > 0)
! *wcs = 0;
! return (0);
! }
!
! /*
! * Traditional mbstowcs(3C) allows wcs==NULL to get the length.
! * SMB never calls it that way, but let's future-proof.
! */
! if (wcs == NULL) {
return ((size_t)-1);
}
! mbslen = strlen(mbs);
! wcslen = nwchars;
! err = uconv_u8tou16((const uchar_t *)mbs, &mbslen,
! wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN);
! if (err != 0)
! return ((size_t)-1);
! if (wcslen < nwchars)
! wcs[wcslen] = 0;
! return (wcslen);
}
/*
* mbtowc
*** 111,165 ****
* without them being interpreted as characters. If mbchar is null
* mbtowc should return non-zero if the current locale requires shift
* states. Otherwise it should be return 0.
*
* If mbchar is non-null, returns the number of bytes processed in
! * mbchar. If mbchar is invalid, returns -1.
*/
int /*ARGSUSED*/
! smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes)
{
! unsigned char mbyte;
! smb_wchar_t wide_char;
! int count;
! int bytes_left;
if (mbchar == NULL)
return (0); /* no shift states */
! /* 0xxxxxxx -> 1 byte ASCII encoding */
! if (((mbyte = *mbchar++) & 0x80) == 0) {
! if (wcharp)
! *wcharp = (smb_wchar_t)mbyte;
!
! return (mbyte ? 1 : 0);
! }
!
! /* 10xxxxxx -> invalid first byte */
! if ((mbyte & 0x40) == 0)
return (-1);
! wide_char = mbyte;
! if ((mbyte & 0x20) == 0) {
! wide_char &= 0x1f;
! bytes_left = 1;
! } else if ((mbyte & 0x10) == 0) {
! wide_char &= 0x0f;
! bytes_left = 2;
! } else {
return (-1);
}
- count = 1;
- while (bytes_left--) {
- if (((mbyte = *mbchar++) & 0xc0) != 0x80)
- return (-1);
-
- count++;
- wide_char = (wide_char << 6) | (mbyte & 0x3f);
- }
-
if (wcharp)
*wcharp = wide_char;
return (count);
}
--- 104,145 ----
* without them being interpreted as characters. If mbchar is null
* mbtowc should return non-zero if the current locale requires shift
* states. Otherwise it should be return 0.
*
* If mbchar is non-null, returns the number of bytes processed in
! * mbchar. If mbchar is null, convert the null (wcharp=0) but
! * return length zero. If mbchar is invalid, returns -1.
*/
int /*ARGSUSED*/
! smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes)
{
! uint32_t wide_char;
! int count, err;
! size_t mblen;
! size_t wclen;
if (mbchar == NULL)
return (0); /* no shift states */
! /*
! * How many bytes in this symbol?
! */
! count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err);
! if (count < 0)
return (-1);
! mblen = count;
! wclen = 1;
! err = uconv_u8tou32((const uchar_t *)mbchar, &mblen,
! &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN);
! if (err != 0)
return (-1);
+ if (wclen == 0) {
+ wide_char = 0;
+ count = 0;
}
if (wcharp)
*wcharp = wide_char;
return (count);
}
*** 171,199 ****
* The wctomb() function converts a wide character wchar into a multibyte
* character and stores the result in mbchar. The object pointed to by
* mbchar must be large enough to accommodate the multibyte character.
*
* Returns the numberof bytes written to mbchar.
*/
int
! smb_wctomb(char *mbchar, smb_wchar_t wchar)
{
! if ((wchar & ~0x7f) == 0) {
! *mbchar = (char)wchar;
! return (1);
! }
! if ((wchar & ~0x7ff) == 0) {
! *mbchar++ = (wchar >> 6) | 0xc0;
! *mbchar = (wchar & 0x3f) | 0x80;
! return (2);
! }
! *mbchar++ = (wchar >> 12) | 0xe0;
! *mbchar++ = ((wchar >> 6) & 0x3f) | 0x80;
! *mbchar = (wchar & 0x3f) | 0x80;
! return (3);
}
/*
* wcstombs
--- 151,181 ----
* The wctomb() function converts a wide character wchar into a multibyte
* character and stores the result in mbchar. The object pointed to by
* mbchar must be large enough to accommodate the multibyte character.
*
* Returns the numberof bytes written to mbchar.
+ * Note: handles null like any 1-byte char.
*/
int
! smb_wctomb(char *mbchar, uint32_t wchar)
{
! char junk[MTS_MB_CUR_MAX+1];
! size_t mblen;
! size_t wclen;
! int err;
! if (mbchar == NULL)
! mbchar = junk;
! mblen = MTS_MB_CUR_MAX;
! wclen = 1;
! err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen,
! UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL);
! if (err != 0)
! return (-1);
!
! return ((int)mblen);
}
/*
* wcstombs
*** 203,252 ****
* stored in mbstring. Partial multibyte characters at the end of the
* string are not stored. The multibyte character string is null
* terminated if there is room.
*
* Returns the number of bytes converted, not counting the terminating
! * null byte.
*/
size_t
! smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes)
{
! char *start = mbstring;
! const smb_wchar_t *wcp = wcstring;
! smb_wchar_t wide_char = 0;
! char buf[4];
! size_t len;
! if ((mbstring == NULL) || (wcstring == NULL))
return (0);
-
- while (nbytes > MTS_MB_CHAR_MAX) {
- wide_char = *wcp++;
- len = smb_wctomb(mbstring, wide_char);
-
- if (wide_char == 0)
- /*LINTED E_PTRDIFF_OVERFLOW*/
- return (mbstring - start);
-
- mbstring += len;
- nbytes -= len;
}
! while (wide_char && nbytes) {
! wide_char = *wcp++;
! if ((len = smb_wctomb(buf, wide_char)) > nbytes) {
! *mbstring = 0;
! break;
}
! bcopy(buf, mbstring, len);
! mbstring += len;
! nbytes -= len;
! }
! /*LINTED E_PTRDIFF_OVERFLOW*/
! return (mbstring - start);
}
/*
* Returns the number of bytes that would be written if the multi-
--- 185,234 ----
* stored in mbstring. Partial multibyte characters at the end of the
* string are not stored. The multibyte character string is null
* terminated if there is room.
*
* Returns the number of bytes converted, not counting the terminating
! * null byte. Returns -1 if an invalid WC sequence is encountered.
*/
size_t
! smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes)
{
! size_t mbslen, wcslen;
! int err;
! /* NULL or empty input is allowed. */
! if (wcs == NULL || *wcs == 0) {
! if (mbs != NULL && nbytes > 0)
! *mbs = '\0';
return (0);
}
! /*
! * Traditional wcstombs(3C) allows mbs==NULL to get the length.
! * SMB never calls it that way, but let's future-proof.
! */
! if (mbs == NULL) {
! return ((size_t)-1);
}
! /*
! * Compute wcslen
! */
! wcslen = 0;
! while (wcs[wcslen] != 0)
! wcslen++;
! mbslen = nbytes;
! err = uconv_u16tou8(wcs, &wcslen,
! (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN);
! if (err != 0)
! return ((size_t)-1);
!
! if (mbslen < nbytes)
! mbs[mbslen] = '\0';
!
! return (mbslen);
}
/*
* Returns the number of bytes that would be written if the multi-
*** 254,406 ****
* counting the terminating null wide character.
*/
size_t
smb_wcequiv_strlen(const char *mbs)
{
! smb_wchar_t wide_char;
size_t bytes;
size_t len = 0;
while (*mbs) {
bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
if (bytes == ((size_t)-1))
return ((size_t)-1);
len += sizeof (smb_wchar_t);
! mbs += bytes;
}
return (len);
}
/*
* Returns the number of bytes that would be written if the multi-
! * byte string mbs was converted to a single byte character string,
! * not counting the terminating null character.
*/
size_t
smb_sbequiv_strlen(const char *mbs)
{
- smb_wchar_t wide_char;
size_t nbytes;
size_t len = 0;
while (*mbs) {
! nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
if (nbytes == ((size_t)-1))
return ((size_t)-1);
! if (wide_char & 0xFF00)
! len += sizeof (smb_wchar_t);
! else
! ++len;
mbs += nbytes;
}
return (len);
}
/*
* stombs
*
! * Convert a regular null terminated string 'string' to a UTF-8 encoded
! * null terminated multi-byte string 'mbstring'. Only full converted
! * UTF-8 characters will be written 'mbstring'. If a character will not
! * fit within the remaining buffer space or 'mbstring' will overflow
! * max_mblen, the conversion process will be terminated and 'mbstring'
! * will be null terminated.
*
! * Returns the number of bytes written to 'mbstring', excluding the
! * terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
! smb_stombs(char *mbstring, char *string, int max_mblen)
{
! char *start = mbstring;
! unsigned char *p = (unsigned char *)string;
! int space_left = max_mblen;
! int len;
! smb_wchar_t wide_char;
! char buf[4];
! if (!mbstring || !string)
return (-1);
! while (*p && space_left > 2) {
! wide_char = *p++;
! len = smb_wctomb(mbstring, wide_char);
! mbstring += len;
! space_left -= len;
}
! if (*p) {
! wide_char = *p;
! if ((len = smb_wctomb(buf, wide_char)) < 2) {
! *mbstring = *buf;
! mbstring += len;
! space_left -= len;
}
! }
! *mbstring = '\0';
!
! /*LINTED E_PTRDIFF_OVERFLOW*/
! return (mbstring - start);
}
/*
* mbstos
*
! * Convert a null terminated multi-byte string 'mbstring' to a regular
! * null terminated string 'string'. A 1-byte character in 'mbstring'
! * maps to a 1-byte character in 'string'. A 2-byte character in
! * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
! * Otherwise the upper byte null will be discarded to ensure that the
! * output stream does not contain embedded null characters.
*
! * If the input stream contains invalid multi-byte characters, a value
! * of -1 will be returned. Otherwise the length of 'string', excluding
! * the terminating null character, is returned.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
! smb_mbstos(char *string, const char *mbstring)
{
! smb_wchar_t wc;
! unsigned char *start = (unsigned char *)string;
! int len;
! if (string == NULL || mbstring == NULL)
return (-1);
! while (*mbstring) {
! if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) {
! *string = 0;
! return (-1);
}
! if (wc & 0xFF00) {
! /*LINTED E_BAD_PTR_CAST_ALIGN*/
! *((smb_wchar_t *)string) = wc;
! string += sizeof (smb_wchar_t);
! }
! else
! {
! *string = (unsigned char)wc;
! string++;
! }
! mbstring += len;
}
! *string = 0;
!
! /*LINTED E_PTRDIFF_OVERFLOW*/
! return ((unsigned char *)string - start);
}
--- 236,476 ----
* counting the terminating null wide character.
*/
size_t
smb_wcequiv_strlen(const char *mbs)
{
! uint32_t wide_char;
size_t bytes;
size_t len = 0;
while (*mbs) {
bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
if (bytes == ((size_t)-1))
return ((size_t)-1);
+ mbs += bytes;
len += sizeof (smb_wchar_t);
! if (bytes > 3) {
! /*
! * Extended unicode, so TWO smb_wchar_t
! */
! len += sizeof (smb_wchar_t);
}
+ }
return (len);
}
/*
* Returns the number of bytes that would be written if the multi-
! * byte string mbs was converted to an OEM character string,
! * (smb_mbstooem) not counting the terminating null character.
*/
size_t
smb_sbequiv_strlen(const char *mbs)
{
size_t nbytes;
size_t len = 0;
while (*mbs) {
! nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX);
if (nbytes == ((size_t)-1))
return ((size_t)-1);
+ if (nbytes == 0)
+ break;
! if (nbytes == 1) {
! /* ASCII */
! len++;
! } else if (nbytes < 8) {
! /* Compute OEM length */
! char mbsbuf[8];
! uint8_t oembuf[8];
! int oemlen;
! (void) strlcpy(mbsbuf, mbs, nbytes+1);
! oemlen = smb_mbstooem(oembuf, mbsbuf, 8);
! if (oemlen < 0)
! return ((size_t)-1);
! len += oemlen;
! } else {
! return ((size_t)-1);
! }
mbs += nbytes;
}
return (len);
}
+ /*
+ * Convert OEM strings to/from internal (UTF-8) form.
+ *
+ * We rarely encounter these anymore because all modern
+ * SMB clients use Unicode (UTF-16). The few cases where
+ * this IS still called are normally using ASCII, i.e.
+ * tag names etc. so short-cut those cases. If we get
+ * something non-ASCII we have to call iconv.
+ *
+ * If we were to really support OEM code pages, we would
+ * need to have a way to set the OEM code page from some
+ * configuration value. For now it's always CP850.
+ * See also ./smb_oem.c
+ */
+ static char smb_oem_codepage[32] = "CP850";
/*
* stombs
*
! * Convert a null terminated OEM string 'string' to a UTF-8 string
! * no longer than max_mblen (null terminated if space).
*
! * If the input string contains invalid OEM characters, a value
! * of -1 will be returned. Otherwise returns the length of 'mbs',
! * excluding the terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
! smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen)
{
! uchar_t *p;
! int oemlen;
! int rlen;
! boolean_t need_iconv = B_FALSE;
! if (mbs == NULL || oems == NULL)
return (-1);
! /*
! * Check if the oems is all ASCII (and get the length
! * while we're at it) so we know if we need to iconv.
! * We usually can avoid the iconv calls.
! */
! oemlen = 0;
! p = (uchar_t *)oems;
! while (*p != '\0') {
! oemlen++;
! if (*p & 0x80)
! need_iconv = B_TRUE;
! p++;
}
! if (need_iconv) {
! int rc;
! char *obuf = mbs;
! size_t olen = max_mblen;
! size_t ilen = oemlen;
! #if defined(_KERNEL) || defined(_FAKE_KERNEL)
! char *ibuf = (char *)oems;
! kiconv_t ic;
! int err;
!
! ic = kiconv_open("UTF-8", smb_oem_codepage);
! if (ic == (kiconv_t)-1)
! goto just_copy;
! rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
! (void) kiconv_close(ic);
! #else /* _KERNEL || _FAKE_KERNEL */
! const char *ibuf = (char *)oems;
! iconv_t ic;
! ic = iconv_open("UTF-8", smb_oem_codepage);
! if (ic == (iconv_t)-1)
! goto just_copy;
! rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
! (void) iconv_close(ic);
! #endif /* _KERNEL || _FAKE_KERNEL */
! if (rc < 0)
! return (-1);
! /* Return val. is output bytes. */
! rlen = (max_mblen - olen);
! } else {
! just_copy:
! rlen = oemlen;
! if (rlen > max_mblen)
! rlen = max_mblen;
! bcopy(oems, mbs, rlen);
}
! if (rlen < max_mblen)
! mbs[rlen] = '\0';
! return (rlen);
}
/*
* mbstos
*
! * Convert a null terminated multi-byte string 'mbs' to an OEM string
! * no longer than max_oemlen (null terminated if space).
*
! * If the input string contains invalid multi-byte characters, a value
! * of -1 will be returned. Otherwise returns the length of 'oems',
! * excluding the terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
! smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen)
{
! uchar_t *p;
! int mbslen;
! int rlen;
! boolean_t need_iconv = B_FALSE;
! if (oems == NULL || mbs == NULL)
return (-1);
! /*
! * Check if the mbs is all ASCII (and get the length
! * while we're at it) so we know if we need to iconv.
! * We usually can avoid the iconv calls.
! */
! mbslen = 0;
! p = (uchar_t *)mbs;
! while (*p != '\0') {
! mbslen++;
! if (*p & 0x80)
! need_iconv = B_TRUE;
! p++;
}
! if (need_iconv) {
! int rc;
! char *obuf = (char *)oems;
! size_t olen = max_oemlen;
! size_t ilen = mbslen;
! #if defined(_KERNEL) || defined(_FAKE_KERNEL)
! char *ibuf = (char *)mbs;
! kiconv_t ic;
! int err;
! ic = kiconv_open(smb_oem_codepage, "UTF-8");
! if (ic == (kiconv_t)-1)
! goto just_copy;
! rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
! (void) kiconv_close(ic);
! #else /* _KERNEL || _FAKE_KERNEL */
! const char *ibuf = mbs;
! iconv_t ic;
! ic = iconv_open(smb_oem_codepage, "UTF-8");
! if (ic == (iconv_t)-1)
! goto just_copy;
! rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
! (void) iconv_close(ic);
! #endif /* _KERNEL || _FAKE_KERNEL */
! if (rc < 0)
! return (-1);
! /* Return val. is output bytes. */
! rlen = (max_oemlen - olen);
! } else {
! just_copy:
! rlen = mbslen;
! if (rlen > max_oemlen)
! rlen = max_oemlen;
! bcopy(mbs, oems, rlen);
}
+ if (rlen < max_oemlen)
+ oems[rlen] = '\0';
! return (rlen);
}