1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 /*
  29  * Support for oem <-> unicode translations.
  30  */
  31 
  32 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
  33 #include <stdlib.h>
  34 #include <thread.h>
  35 #include <synch.h>
  36 #include <string.h>
  37 #else
  38 #include <sys/types.h>
  39 #include <sys/ksynch.h>
  40 #include <sys/sunddi.h>
  41 #endif /* _KERNEL */
  42 
  43 #include <sys/byteorder.h>
  44 #include <smbsrv/alloc.h>
  45 #include <smbsrv/string.h>
  46 
  47 /*
  48  * cpid         The oemcpg_table index for this oempage.
  49  * value        The conversion values.
  50  */
  51 typedef struct oempage {
  52         uint32_t        cpid;
  53         smb_wchar_t     *value;
  54 } oempage_t;
  55 
  56 /*
  57  * filename     The actual filename contains the codepage.
  58  * bytesperchar The codepage uses double or single bytes per char.
  59  * oempage      The oempage is used to convert Unicode characters to
  60  *              OEM characters.  Memory needs to be allocated for
  61  *              the value field of oempage to store the table.
  62  * ucspage      The unicode page is used to convert OEM characters
  63  *              to Unicode characters.  Memory needs to be allocated
  64  *              for the value field of ucspage to store the table.
  65  * valid        True if the codepage has been initialized.
  66  */
  67 typedef struct oem_codepage {
  68         char            *filename;
  69         uint32_t        bytesperchar;
  70         oempage_t       oempage;
  71         oempage_t       ucspage;
  72         boolean_t       valid;
  73 } oem_codepage_t;
  74 
  75 static oem_codepage_t oemcpg_table[] = {
  76         {"850.cpg",  1, {0, 0},  {0, 0},  0},   /* Multilingual Latin1 */
  77         {"950.cpg",  2, {1, 0},  {1, 0},  0},   /* Chinese Traditional */
  78         {"1252.cpg", 1, {2, 0},  {2, 0},  0},   /* MS Latin1 */
  79         {"949.cpg",  2, {3, 0},  {3, 0},  0},   /* Korean */
  80         {"936.cpg",  2, {4, 0},  {4, 0},  0},   /* Chinese Simplified */
  81         {"932.cpg",  2, {5, 0},  {5, 0},  0},   /* Japanese */
  82         {"852.cpg",  1, {6, 0},  {6, 0},  0},   /* Multilingual Latin2 */
  83         {"1250.cpg", 1, {7, 0},  {7, 0},  0},   /* MS Latin2 */
  84         {"1253.cpg", 1, {8, 0},  {8, 0},  0},   /* MS Greek */
  85         {"737.cpg",  1, {9, 0},  {9, 0},  0},   /* Greek */
  86         {"1254.cpg", 1, {10, 0}, {10, 0}, 0},   /* MS Turkish */
  87         {"857.cpg",  1, {11, 0}, {11, 0}, 0},   /* Multilingual Latin5 */
  88         {"1251.cpg", 1, {12, 0}, {12, 0}, 0},   /* MS Cyrillic */
  89         {"866.cpg",  1, {13, 0}, {13, 0}, 0},   /* Cyrillic II */
  90         {"1255.cpg", 1, {14, 0}, {14, 0}, 0},   /* MS Hebrew */
  91         {"862.cpg",  1, {15, 0}, {15, 0}, 0},   /* Hebrew */
  92         {"1256.cpg", 1, {16, 0}, {16, 0}, 0},   /* MS Arabic */
  93         {"720.cpg",  1, {17, 0}, {17, 0}, 0}    /* Arabic */
  94 };
  95 
  96 #define MAX_OEMPAGES    (sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
  97 #define MAX_UNICODE_IDX 65536
  98 
  99 /*
 100  * The default SMB OEM codepage for English is codepage 850.
 101  */
 102 const smb_wchar_t oem_codepage_850[256] = {
 103         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
 104         0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
 105         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
 106         0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
 107         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
 108         0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
 109         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 110         0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
 111         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 112         0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
 113         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 114         0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
 115         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 116         0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
 117         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 118         0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
 119         0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
 120         0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
 121         0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
 122         0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
 123         0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
 124         0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
 125         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
 126         0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
 127         0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
 128         0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
 129         0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
 130         0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
 131         0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
 132         0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
 133         0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
 134         0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
 135 };
 136 
 137 /*
 138  * The default telnet OEM codepage for English is codepage 1252.
 139  */
 140 const smb_wchar_t oem_codepage_1252[256] = {
 141         0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
 142         0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
 143         0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
 144         0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
 145         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 146         0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
 147         0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
 148         0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
 149         0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
 150         0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
 151         0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
 152         0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
 153         0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
 154         0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
 155         0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 156         0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
 157         0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
 158         0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
 159         0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
 160         0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
 161         0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
 162         0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
 163         0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
 164         0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
 165         0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
 166         0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
 167         0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
 168         0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
 169         0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
 170         0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
 171         0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
 172         0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
 173 };
 174 
 175 static oempage_t *oem_get_oempage(uint32_t);
 176 static oempage_t *oem_get_ucspage(uint32_t);
 177 static void oem_codepage_init(uint32_t);
 178 static void oem_codepage_setup(uint32_t);
 179 
 180 /*
 181  * Convert a unicode string to an oem string.
 182  *
 183  * The conversion will stop at the end of the unicode string
 184  * or when (nbytes - 1) oem characters have been stored.
 185  *
 186  * The number of converted unicode characters is returned,
 187  * or 0 on error.
 188  */
 189 size_t
 190 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
 191 {
 192         oempage_t       *ucspage;
 193         uint32_t        count = 0;
 194         smb_wchar_t     oemchar;
 195 
 196         if (ucs == NULL || oem == NULL)
 197                 return (0);
 198 
 199         if ((ucspage = oem_get_ucspage(cpid)) == NULL)
 200                 return (0);
 201 
 202         while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
 203                 if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
 204                         *oem++ = oemchar >> 8;
 205                         *oem++ = (char)oemchar;
 206                         nbytes -= 2;
 207                 } else if (nbytes > 1) {
 208                         *oem++ = (char)oemchar;
 209                         nbytes--;
 210                 } else {
 211                         break;
 212                 }
 213 
 214                 count++;
 215                 ucs++;
 216         }
 217 
 218         *oem = '\0';
 219         return (count);
 220 }
 221 
 222 /*
 223  * Convert an oem string to a unicode string.
 224  *
 225  * The conversion will stop at the end of the oem string or
 226  * when nwchars - 1 have been converted.
 227  *
 228  * The number of converted oem chars is returned, or 0 on error.
 229  * An oem char may be either 1 or 2 bytes.
 230  */
 231 size_t
 232 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
 233 {
 234         oempage_t       *oempage;
 235         size_t          count = nwchars;
 236         smb_wchar_t     oemchar;
 237 
 238         if (ucs == NULL || oem == NULL)
 239                 return (0);
 240 
 241         if ((oempage = oem_get_oempage(cpid)) == NULL)
 242                 return (0);
 243 
 244         while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
 245                 /*
 246                  * Cannot find one byte oemchar in table.
 247                  * Must be a lead byte. Try two bytes.
 248                  */
 249                 if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
 250                         oemchar = oemchar << 8 | (*oem++ & 0xff);
 251                         if (oempage->value[oemchar] == 0) {
 252                                 *ucs = 0;
 253                                 break;
 254                         }
 255                 }
 256 #ifdef _BIG_ENDIAN
 257                 *ucs = LE_IN16(&oempage->value[oemchar]);
 258 #else
 259                 *ucs = oempage->value[oemchar];
 260 #endif
 261                 count--;
 262                 ucs++;
 263         }
 264 
 265         *ucs = 0;
 266         return (nwchars - count);
 267 }
 268 
 269 /*
 270  * Get a pointer to the oem page for the specific codepage id.
 271  */
 272 static oempage_t *
 273 oem_get_oempage(uint32_t cpid)
 274 {
 275         if (cpid >= MAX_OEMPAGES)
 276                 return (NULL);
 277 
 278         if (!oemcpg_table[cpid].valid) {
 279                 oem_codepage_init(cpid);
 280 
 281                 if (!oemcpg_table[cpid].valid)
 282                         return (NULL);
 283         }
 284 
 285         return (&oemcpg_table[cpid].oempage);
 286 }
 287 
 288 /*
 289  * Get a pointer to the ucs page for the specific codepage id.
 290  */
 291 static oempage_t *
 292 oem_get_ucspage(uint32_t cpid)
 293 {
 294         if (cpid >= MAX_OEMPAGES)
 295                 return (NULL);
 296 
 297         if (!oemcpg_table[cpid].valid) {
 298                 oem_codepage_init(cpid);
 299 
 300                 if (!oemcpg_table[cpid].valid)
 301                         return (NULL);
 302         }
 303 
 304         return (&oemcpg_table[cpid].ucspage);
 305 }
 306 
 307 /*
 308  * Initialize the oem page in the oem table.
 309  */
 310 static void
 311 oem_codepage_init(uint32_t cpid)
 312 {
 313 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
 314         static mutex_t mutex;
 315 
 316         (void) mutex_lock(&mutex);
 317         oem_codepage_setup(cpid);
 318         (void) mutex_unlock(&mutex);
 319 #else
 320         static kmutex_t mutex;
 321 
 322         mutex_enter(&mutex);
 323         oem_codepage_setup(cpid);
 324         mutex_exit(&mutex);
 325 #endif /* _KERNEL */
 326 }
 327 
 328 static void
 329 oem_codepage_setup(uint32_t cpid)
 330 {
 331         const smb_wchar_t *default_oem_cp;
 332         oem_codepage_t  *oemcpg;
 333         uint32_t        bytesperchar;
 334         uint32_t        max_oem_index;
 335         int             i;
 336 
 337         switch (cpid) {
 338         case OEM_CPG_850:
 339                 default_oem_cp = oem_codepage_850;
 340                 break;
 341         case OEM_CPG_1252:
 342                 default_oem_cp = oem_codepage_1252;
 343         default:
 344                 return;
 345         }
 346 
 347         oemcpg = &oemcpg_table[cpid];
 348         if (oemcpg->valid)
 349                 return;
 350 
 351         /*
 352          * max_oem_index will be 256 or 65536 dependent
 353          * on the OEM codepage.
 354          */
 355         bytesperchar = oemcpg_table[cpid].bytesperchar;
 356         max_oem_index = 1 << (bytesperchar * 8);
 357 
 358         oemcpg->oempage.value =
 359             MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
 360         if (oemcpg->oempage.value == NULL)
 361                 return;
 362 
 363         oemcpg->ucspage.value =
 364             MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
 365         if (oemcpg->ucspage.value == NULL) {
 366                 MEM_FREE("oem", oemcpg->oempage.value);
 367                 oemcpg->oempage.value = NULL;
 368                 return;
 369         }
 370 
 371         for (i = 0; i < max_oem_index; i++) {
 372                 oemcpg->oempage.value[i] = default_oem_cp[i];
 373                 oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
 374         }
 375 
 376         oemcpg->valid = B_TRUE;
 377 }