Print this page
NEX-19712 SMB directory listings sometimes wrong after NEX-19025
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
NEX-19025 CIFS gets confused with filenames containing enhanced Unicode
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
and: (fix build, check-rtime)
NEX-4458 Incorrect directory listing response for non-UNICODE clients
Reviewed by: Matt Barden <Matt.Barden@nexenta.com>
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
NEX-2460 libfksmbd should not link with libsmb
SMB-50 User-mode SMB server
 Includes work by these authors:
 Thomas Keiser <thomas.keiser@nexenta.com>
 Albert Lee <trisk@nexenta.com>

*** 20,66 **** */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * ! * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ /* ! * Multibyte/wide-char conversion routines. Wide-char encoding provides ! * a fixed size character encoding that maps to the Unicode 16-bit ! * (UCS-2) character set standard. Multibyte or UCS transformation ! * format (UTF) encoding is a variable length character encoding scheme ! * that s compatible with existing ASCII characters and guarantees that ! * the resultant strings do not contain embedded null characters. Both ! * types of encoding provide a null terminator: single byte for UTF-8 ! * and a wide-char null for Unicode. See RFC 2044. ! * ! * The table below illustrates the UTF-8 encoding scheme. The letter x ! * indicates bits available for encoding the character value. ! * ! * UCS-2 UTF-8 octet sequence (binary) ! * 0x0000-0x007F 0xxxxxxx ! * 0x0080-0x07FF 110xxxxx 10xxxxxx ! * 0x0800-0xFFFF 1110xxxx 10xxxxxx 10xxxxxx ! * ! * RFC 2044 ! * UTF-8,a transformation format of UNICODE and ISO 10646 ! * F. Yergeau ! * Alis Technologies ! * October 1996 */ #if defined(_KERNEL) || defined(_FAKE_KERNEL) #include <sys/types.h> #include <sys/sunddi.h> ! #else #include <stdio.h> #include <stdlib.h> - #include <assert.h> #include <strings.h> ! #endif #include <smbsrv/string.h> /* * mbstowcs --- 20,48 ---- */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * ! * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* ! * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire ! * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs). */ #if defined(_KERNEL) || defined(_FAKE_KERNEL) #include <sys/types.h> #include <sys/sunddi.h> ! #else /* _KERNEL || _FAKE_KERNEL */ #include <stdio.h> #include <stdlib.h> #include <strings.h> ! #include <iconv.h> ! #include <assert.h> ! #endif /* _KERNEL || _FAKE_KERNEL */ ! #include <sys/u8_textprep.h> #include <smbsrv/string.h> /* * mbstowcs
*** 73,102 **** * Returns the number of wide characters converted, not counting * any terminating null wide character. Returns -1 if an invalid * multibyte character is encountered. */ size_t ! smb_mbstowcs(smb_wchar_t *wcstring, const char *mbstring, size_t nwchars) { ! int len; ! smb_wchar_t *start = wcstring; ! while (nwchars--) { ! len = smb_mbtowc(wcstring, mbstring, MTS_MB_CHAR_MAX); ! if (len < 0) { ! *wcstring = 0; return ((size_t)-1); } ! if (*mbstring == 0) ! break; ! ++wcstring; ! mbstring += len; ! } ! return (wcstring - start); } /* * mbtowc --- 55,95 ---- * Returns the number of wide characters converted, not counting * any terminating null wide character. Returns -1 if an invalid * multibyte character is encountered. */ size_t ! smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars) { ! size_t mbslen, wcslen; ! int err; ! /* NULL or empty input is allowed. */ ! if (mbs == NULL || *mbs == '\0') { ! if (wcs != NULL && nwchars > 0) ! *wcs = 0; ! return (0); ! } ! ! /* ! * Traditional mbstowcs(3C) allows wcs==NULL to get the length. ! * SMB never calls it that way, but let's future-proof. ! */ ! if (wcs == NULL) { return ((size_t)-1); } ! mbslen = strlen(mbs); ! wcslen = nwchars; ! err = uconv_u8tou16((const uchar_t *)mbs, &mbslen, ! wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN); ! if (err != 0) ! return ((size_t)-1); ! if (wcslen < nwchars) ! wcs[wcslen] = 0; ! return (wcslen); } /* * mbtowc
*** 111,165 **** * without them being interpreted as characters. If mbchar is null * mbtowc should return non-zero if the current locale requires shift * states. Otherwise it should be return 0. * * If mbchar is non-null, returns the number of bytes processed in ! * mbchar. If mbchar is invalid, returns -1. */ int /*ARGSUSED*/ ! smb_mbtowc(smb_wchar_t *wcharp, const char *mbchar, size_t nbytes) { ! unsigned char mbyte; ! smb_wchar_t wide_char; ! int count; ! int bytes_left; if (mbchar == NULL) return (0); /* no shift states */ ! /* 0xxxxxxx -> 1 byte ASCII encoding */ ! if (((mbyte = *mbchar++) & 0x80) == 0) { ! if (wcharp) ! *wcharp = (smb_wchar_t)mbyte; ! ! return (mbyte ? 1 : 0); ! } ! ! /* 10xxxxxx -> invalid first byte */ ! if ((mbyte & 0x40) == 0) return (-1); ! wide_char = mbyte; ! if ((mbyte & 0x20) == 0) { ! wide_char &= 0x1f; ! bytes_left = 1; ! } else if ((mbyte & 0x10) == 0) { ! wide_char &= 0x0f; ! bytes_left = 2; ! } else { return (-1); } - count = 1; - while (bytes_left--) { - if (((mbyte = *mbchar++) & 0xc0) != 0x80) - return (-1); - - count++; - wide_char = (wide_char << 6) | (mbyte & 0x3f); - } - if (wcharp) *wcharp = wide_char; return (count); } --- 104,145 ---- * without them being interpreted as characters. If mbchar is null * mbtowc should return non-zero if the current locale requires shift * states. Otherwise it should be return 0. * * If mbchar is non-null, returns the number of bytes processed in ! * mbchar. If mbchar is null, convert the null (wcharp=0) but ! * return length zero. If mbchar is invalid, returns -1. */ int /*ARGSUSED*/ ! smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes) { ! uint32_t wide_char; ! int count, err; ! size_t mblen; ! size_t wclen; if (mbchar == NULL) return (0); /* no shift states */ ! /* ! * How many bytes in this symbol? ! */ ! count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err); ! if (count < 0) return (-1); ! mblen = count; ! wclen = 1; ! err = uconv_u8tou32((const uchar_t *)mbchar, &mblen, ! &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN); ! if (err != 0) return (-1); + if (wclen == 0) { + wide_char = 0; + count = 0; } if (wcharp) *wcharp = wide_char; return (count); }
*** 171,199 **** * The wctomb() function converts a wide character wchar into a multibyte * character and stores the result in mbchar. The object pointed to by * mbchar must be large enough to accommodate the multibyte character. * * Returns the numberof bytes written to mbchar. */ int ! smb_wctomb(char *mbchar, smb_wchar_t wchar) { ! if ((wchar & ~0x7f) == 0) { ! *mbchar = (char)wchar; ! return (1); ! } ! if ((wchar & ~0x7ff) == 0) { ! *mbchar++ = (wchar >> 6) | 0xc0; ! *mbchar = (wchar & 0x3f) | 0x80; ! return (2); ! } ! *mbchar++ = (wchar >> 12) | 0xe0; ! *mbchar++ = ((wchar >> 6) & 0x3f) | 0x80; ! *mbchar = (wchar & 0x3f) | 0x80; ! return (3); } /* * wcstombs --- 151,181 ---- * The wctomb() function converts a wide character wchar into a multibyte * character and stores the result in mbchar. The object pointed to by * mbchar must be large enough to accommodate the multibyte character. * * Returns the numberof bytes written to mbchar. + * Note: handles null like any 1-byte char. */ int ! smb_wctomb(char *mbchar, uint32_t wchar) { ! char junk[MTS_MB_CUR_MAX+1]; ! size_t mblen; ! size_t wclen; ! int err; ! if (mbchar == NULL) ! mbchar = junk; ! mblen = MTS_MB_CUR_MAX; ! wclen = 1; ! err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen, ! UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL); ! if (err != 0) ! return (-1); ! ! return ((int)mblen); } /* * wcstombs
*** 203,252 **** * stored in mbstring. Partial multibyte characters at the end of the * string are not stored. The multibyte character string is null * terminated if there is room. * * Returns the number of bytes converted, not counting the terminating ! * null byte. */ size_t ! smb_wcstombs(char *mbstring, const smb_wchar_t *wcstring, size_t nbytes) { ! char *start = mbstring; ! const smb_wchar_t *wcp = wcstring; ! smb_wchar_t wide_char = 0; ! char buf[4]; ! size_t len; ! if ((mbstring == NULL) || (wcstring == NULL)) return (0); - - while (nbytes > MTS_MB_CHAR_MAX) { - wide_char = *wcp++; - len = smb_wctomb(mbstring, wide_char); - - if (wide_char == 0) - /*LINTED E_PTRDIFF_OVERFLOW*/ - return (mbstring - start); - - mbstring += len; - nbytes -= len; } ! while (wide_char && nbytes) { ! wide_char = *wcp++; ! if ((len = smb_wctomb(buf, wide_char)) > nbytes) { ! *mbstring = 0; ! break; } ! bcopy(buf, mbstring, len); ! mbstring += len; ! nbytes -= len; ! } ! /*LINTED E_PTRDIFF_OVERFLOW*/ ! return (mbstring - start); } /* * Returns the number of bytes that would be written if the multi- --- 185,234 ---- * stored in mbstring. Partial multibyte characters at the end of the * string are not stored. The multibyte character string is null * terminated if there is room. * * Returns the number of bytes converted, not counting the terminating ! * null byte. Returns -1 if an invalid WC sequence is encountered. */ size_t ! smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes) { ! size_t mbslen, wcslen; ! int err; ! /* NULL or empty input is allowed. */ ! if (wcs == NULL || *wcs == 0) { ! if (mbs != NULL && nbytes > 0) ! *mbs = '\0'; return (0); } ! /* ! * Traditional wcstombs(3C) allows mbs==NULL to get the length. ! * SMB never calls it that way, but let's future-proof. ! */ ! if (mbs == NULL) { ! return ((size_t)-1); } ! /* ! * Compute wcslen ! */ ! wcslen = 0; ! while (wcs[wcslen] != 0) ! wcslen++; ! mbslen = nbytes; ! err = uconv_u16tou8(wcs, &wcslen, ! (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN); ! if (err != 0) ! return ((size_t)-1); ! ! if (mbslen < nbytes) ! mbs[mbslen] = '\0'; ! ! return (mbslen); } /* * Returns the number of bytes that would be written if the multi-
*** 254,406 **** * counting the terminating null wide character. */ size_t smb_wcequiv_strlen(const char *mbs) { ! smb_wchar_t wide_char; size_t bytes; size_t len = 0; while (*mbs) { bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX); if (bytes == ((size_t)-1)) return ((size_t)-1); len += sizeof (smb_wchar_t); ! mbs += bytes; } return (len); } /* * Returns the number of bytes that would be written if the multi- ! * byte string mbs was converted to a single byte character string, ! * not counting the terminating null character. */ size_t smb_sbequiv_strlen(const char *mbs) { - smb_wchar_t wide_char; size_t nbytes; size_t len = 0; while (*mbs) { ! nbytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX); if (nbytes == ((size_t)-1)) return ((size_t)-1); ! if (wide_char & 0xFF00) ! len += sizeof (smb_wchar_t); ! else ! ++len; mbs += nbytes; } return (len); } /* * stombs * ! * Convert a regular null terminated string 'string' to a UTF-8 encoded ! * null terminated multi-byte string 'mbstring'. Only full converted ! * UTF-8 characters will be written 'mbstring'. If a character will not ! * fit within the remaining buffer space or 'mbstring' will overflow ! * max_mblen, the conversion process will be terminated and 'mbstring' ! * will be null terminated. * ! * Returns the number of bytes written to 'mbstring', excluding the ! * terminating null character. * * If either mbstring or string is a null pointer, -1 is returned. */ int ! smb_stombs(char *mbstring, char *string, int max_mblen) { ! char *start = mbstring; ! unsigned char *p = (unsigned char *)string; ! int space_left = max_mblen; ! int len; ! smb_wchar_t wide_char; ! char buf[4]; ! if (!mbstring || !string) return (-1); ! while (*p && space_left > 2) { ! wide_char = *p++; ! len = smb_wctomb(mbstring, wide_char); ! mbstring += len; ! space_left -= len; } ! if (*p) { ! wide_char = *p; ! if ((len = smb_wctomb(buf, wide_char)) < 2) { ! *mbstring = *buf; ! mbstring += len; ! space_left -= len; } ! } ! *mbstring = '\0'; ! ! /*LINTED E_PTRDIFF_OVERFLOW*/ ! return (mbstring - start); } /* * mbstos * ! * Convert a null terminated multi-byte string 'mbstring' to a regular ! * null terminated string 'string'. A 1-byte character in 'mbstring' ! * maps to a 1-byte character in 'string'. A 2-byte character in ! * 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null. ! * Otherwise the upper byte null will be discarded to ensure that the ! * output stream does not contain embedded null characters. * ! * If the input stream contains invalid multi-byte characters, a value ! * of -1 will be returned. Otherwise the length of 'string', excluding ! * the terminating null character, is returned. * * If either mbstring or string is a null pointer, -1 is returned. */ int ! smb_mbstos(char *string, const char *mbstring) { ! smb_wchar_t wc; ! unsigned char *start = (unsigned char *)string; ! int len; ! if (string == NULL || mbstring == NULL) return (-1); ! while (*mbstring) { ! if ((len = smb_mbtowc(&wc, mbstring, MTS_MB_CHAR_MAX)) < 0) { ! *string = 0; ! return (-1); } ! if (wc & 0xFF00) { ! /*LINTED E_BAD_PTR_CAST_ALIGN*/ ! *((smb_wchar_t *)string) = wc; ! string += sizeof (smb_wchar_t); ! } ! else ! { ! *string = (unsigned char)wc; ! string++; ! } ! mbstring += len; } ! *string = 0; ! ! /*LINTED E_PTRDIFF_OVERFLOW*/ ! return ((unsigned char *)string - start); } --- 236,476 ---- * counting the terminating null wide character. */ size_t smb_wcequiv_strlen(const char *mbs) { ! uint32_t wide_char; size_t bytes; size_t len = 0; while (*mbs) { bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX); if (bytes == ((size_t)-1)) return ((size_t)-1); + mbs += bytes; len += sizeof (smb_wchar_t); ! if (bytes > 3) { ! /* ! * Extended unicode, so TWO smb_wchar_t ! */ ! len += sizeof (smb_wchar_t); } + } return (len); } /* * Returns the number of bytes that would be written if the multi- ! * byte string mbs was converted to an OEM character string, ! * (smb_mbstooem) not counting the terminating null character. */ size_t smb_sbequiv_strlen(const char *mbs) { size_t nbytes; size_t len = 0; while (*mbs) { ! nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX); if (nbytes == ((size_t)-1)) return ((size_t)-1); + if (nbytes == 0) + break; ! if (nbytes == 1) { ! /* ASCII */ ! len++; ! } else if (nbytes < 8) { ! /* Compute OEM length */ ! char mbsbuf[8]; ! uint8_t oembuf[8]; ! int oemlen; ! (void) strlcpy(mbsbuf, mbs, nbytes+1); ! oemlen = smb_mbstooem(oembuf, mbsbuf, 8); ! if (oemlen < 0) ! return ((size_t)-1); ! len += oemlen; ! } else { ! return ((size_t)-1); ! } mbs += nbytes; } return (len); } + /* + * Convert OEM strings to/from internal (UTF-8) form. + * + * We rarely encounter these anymore because all modern + * SMB clients use Unicode (UTF-16). The few cases where + * this IS still called are normally using ASCII, i.e. + * tag names etc. so short-cut those cases. If we get + * something non-ASCII we have to call iconv. + * + * If we were to really support OEM code pages, we would + * need to have a way to set the OEM code page from some + * configuration value. For now it's always CP850. + * See also ./smb_oem.c + */ + static char smb_oem_codepage[32] = "CP850"; /* * stombs * ! * Convert a null terminated OEM string 'string' to a UTF-8 string ! * no longer than max_mblen (null terminated if space). * ! * If the input string contains invalid OEM characters, a value ! * of -1 will be returned. Otherwise returns the length of 'mbs', ! * excluding the terminating null character. * * If either mbstring or string is a null pointer, -1 is returned. */ int ! smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen) { ! uchar_t *p; ! int oemlen; ! int rlen; ! boolean_t need_iconv = B_FALSE; ! if (mbs == NULL || oems == NULL) return (-1); ! /* ! * Check if the oems is all ASCII (and get the length ! * while we're at it) so we know if we need to iconv. ! * We usually can avoid the iconv calls. ! */ ! oemlen = 0; ! p = (uchar_t *)oems; ! while (*p != '\0') { ! oemlen++; ! if (*p & 0x80) ! need_iconv = B_TRUE; ! p++; } ! if (need_iconv) { ! int rc; ! char *obuf = mbs; ! size_t olen = max_mblen; ! size_t ilen = oemlen; ! #if defined(_KERNEL) || defined(_FAKE_KERNEL) ! char *ibuf = (char *)oems; ! kiconv_t ic; ! int err; ! ! ic = kiconv_open("UTF-8", smb_oem_codepage); ! if (ic == (kiconv_t)-1) ! goto just_copy; ! rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err); ! (void) kiconv_close(ic); ! #else /* _KERNEL || _FAKE_KERNEL */ ! const char *ibuf = (char *)oems; ! iconv_t ic; ! ic = iconv_open("UTF-8", smb_oem_codepage); ! if (ic == (iconv_t)-1) ! goto just_copy; ! rc = iconv(ic, &ibuf, &ilen, &obuf, &olen); ! (void) iconv_close(ic); ! #endif /* _KERNEL || _FAKE_KERNEL */ ! if (rc < 0) ! return (-1); ! /* Return val. is output bytes. */ ! rlen = (max_mblen - olen); ! } else { ! just_copy: ! rlen = oemlen; ! if (rlen > max_mblen) ! rlen = max_mblen; ! bcopy(oems, mbs, rlen); } ! if (rlen < max_mblen) ! mbs[rlen] = '\0'; ! return (rlen); } /* * mbstos * ! * Convert a null terminated multi-byte string 'mbs' to an OEM string ! * no longer than max_oemlen (null terminated if space). * ! * If the input string contains invalid multi-byte characters, a value ! * of -1 will be returned. Otherwise returns the length of 'oems', ! * excluding the terminating null character. * * If either mbstring or string is a null pointer, -1 is returned. */ int ! smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen) { ! uchar_t *p; ! int mbslen; ! int rlen; ! boolean_t need_iconv = B_FALSE; ! if (oems == NULL || mbs == NULL) return (-1); ! /* ! * Check if the mbs is all ASCII (and get the length ! * while we're at it) so we know if we need to iconv. ! * We usually can avoid the iconv calls. ! */ ! mbslen = 0; ! p = (uchar_t *)mbs; ! while (*p != '\0') { ! mbslen++; ! if (*p & 0x80) ! need_iconv = B_TRUE; ! p++; } ! if (need_iconv) { ! int rc; ! char *obuf = (char *)oems; ! size_t olen = max_oemlen; ! size_t ilen = mbslen; ! #if defined(_KERNEL) || defined(_FAKE_KERNEL) ! char *ibuf = (char *)mbs; ! kiconv_t ic; ! int err; ! ic = kiconv_open(smb_oem_codepage, "UTF-8"); ! if (ic == (kiconv_t)-1) ! goto just_copy; ! rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err); ! (void) kiconv_close(ic); ! #else /* _KERNEL || _FAKE_KERNEL */ ! const char *ibuf = mbs; ! iconv_t ic; ! ic = iconv_open(smb_oem_codepage, "UTF-8"); ! if (ic == (iconv_t)-1) ! goto just_copy; ! rc = iconv(ic, &ibuf, &ilen, &obuf, &olen); ! (void) iconv_close(ic); ! #endif /* _KERNEL || _FAKE_KERNEL */ ! if (rc < 0) ! return (-1); ! /* Return val. is output bytes. */ ! rlen = (max_oemlen - olen); ! } else { ! just_copy: ! rlen = mbslen; ! if (rlen > max_oemlen) ! rlen = max_oemlen; ! bcopy(mbs, oems, rlen); } + if (rlen < max_oemlen) + oems[rlen] = '\0'; ! return (rlen); }