1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 /*
  29  * Support for oem <-> unicode translations.
  30  */
  31 
  32 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
  33 #include <stdlib.h>
  34 #include <thread.h>
  35 #include <synch.h>
  36 #include <string.h>
  37 #else
  38 #include <sys/ksynch.h>
  39 #endif /* _KERNEL */
  40 
  41 #include <sys/byteorder.h>
  42 #include <smbsrv/alloc.h>
  43 #include <smbsrv/string.h>
  44 
  45 /*
  46  * cpid         The oemcpg_table index for this oempage.
  47  * value        The conversion values.
  48  */
  49 typedef struct oempage {
  50         uint32_t        cpid;
  51         smb_wchar_t     *value;
  52 } oempage_t;
  53 
  54 /*
  55  * filename     The actual filename contains the codepage.
  56  * bytesperchar The codepage uses double or single bytes per char.
  57  * oempage      The oempage is used to convert Unicode characters to
  58  *              OEM characters.  Memory needs to be allocated for
  59  *              the value field of oempage to store the table.
  60  * ucspage      The unicode page is used to convert OEM characters
  61  *              to Unicode characters.  Memory needs to be allocated
  62  *              for the value field of ucspage to store the table.
  63  * valid        True if the codepage has been initialized.
  64  */
  65 typedef struct oem_codepage {
  66         char            *filename;
  67         uint32_t        bytesperchar;
  68         oempage_t       oempage;
  69         oempage_t       ucspage;
  70         boolean_t       valid;
  71 } oem_codepage_t;
  72 
  73 static oem_codepage_t oemcpg_table[] = {
  74         {"850.cpg",  1, {0, 0},  {0, 0},  0},   /* Multilingual Latin1 */
  75         {"950.cpg",  2, {1, 0},  {1, 0},  0},   /* Chinese Traditional */
  76         {"1252.cpg", 1, {2, 0},  {2, 0},  0},   /* MS Latin1 */
  77         {"949.cpg",  2, {3, 0},  {3, 0},  0},   /* Korean */
  78         {"936.cpg",  2, {4, 0},  {4, 0},  0},   /* Chinese Simplified */
  79         {"932.cpg",  2, {5, 0},  {5, 0},  0},   /* Japanese */
  80         {"852.cpg",  1, {6, 0},  {6, 0},  0},   /* Multilingual Latin2 */
  81         {"1250.cpg", 1, {7, 0},  {7, 0},  0},   /* MS Latin2 */
  82         {"1253.cpg", 1, {8, 0},  {8, 0},  0},   /* MS Greek */
  83         {"737.cpg",  1, {9, 0},  {9, 0},  0},   /* Greek */
  84         {"1254.cpg", 1, {10, 0}, {10, 0}, 0},   /* MS Turkish */
  85         {"857.cpg",  1, {11, 0}, {11, 0}, 0},   /* Multilingual Latin5 */
  86         {"1251.cpg", 1, {12, 0}, {12, 0}, 0},   /* MS Cyrillic */
  87         {"866.cpg",  1, {13, 0}, {13, 0}, 0},   /* Cyrillic II */
  88         {"1255.cpg", 1, {14, 0}, {14, 0}, 0},   /* MS Hebrew */
  89         {"862.cpg",  1, {15, 0}, {15, 0}, 0},   /* Hebrew */
  90         {"1256.cpg", 1, {16, 0}, {16, 0}, 0},   /* MS Arabic */
  91         {"720.cpg",  1, {17, 0}, {17, 0}, 0}    /* Arabic */
  92 };
  93 
  94 #define MAX_OEMPAGES    (sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
  95 #define MAX_UNICODE_IDX 65536
  96 
  97 /*
  98  * The default SMB OEM codepage for English is codepage 850.
  99  */
 100 const smb_wchar_t oem_codepage_850[256] = {
 101         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
 102         0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
 103         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
 104         0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
 105         0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
 106         0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
 107         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 108         0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
 109         0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 110         0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
 111         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 112         0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
 113         0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 114         0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
 115         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 116         0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
 117         0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
 118         0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
 119         0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
 120         0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
 121         0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
 122         0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
 123         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
 124         0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
 125         0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
 126         0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
 127         0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
 128         0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
 129         0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
 130         0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
 131         0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
 132         0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
 133 };
 134 
 135 /*
 136  * The default telnet OEM codepage for English is codepage 1252.
 137  */
 138 const smb_wchar_t oem_codepage_1252[256] = {
 139         0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
 140         0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
 141         0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
 142         0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
 143         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 144         0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
 145         0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
 146         0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
 147         0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
 148         0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
 149         0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
 150         0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
 151         0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
 152         0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
 153         0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 154         0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
 155         0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
 156         0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
 157         0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
 158         0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
 159         0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
 160         0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
 161         0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
 162         0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
 163         0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
 164         0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
 165         0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
 166         0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
 167         0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
 168         0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
 169         0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
 170         0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
 171 };
 172 
 173 static oempage_t *oem_get_oempage(uint32_t);
 174 static oempage_t *oem_get_ucspage(uint32_t);
 175 static void oem_codepage_init(uint32_t);
 176 static void oem_codepage_setup(uint32_t);
 177 
 178 /*
 179  * Convert a unicode string to an oem string.
 180  *
 181  * The conversion will stop at the end of the unicode string
 182  * or when (nbytes - 1) oem characters have been stored.
 183  *
 184  * The number of converted unicode characters is returned,
 185  * or 0 on error.
 186  */
 187 size_t
 188 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
 189 {
 190         oempage_t       *ucspage;
 191         uint32_t        count = 0;
 192         smb_wchar_t     oemchar;
 193 
 194         if (ucs == NULL || oem == NULL)
 195                 return (0);
 196 
 197         if ((ucspage = oem_get_ucspage(cpid)) == NULL)
 198                 return (0);
 199 
 200         while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
 201                 if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
 202                         *oem++ = oemchar >> 8;
 203                         *oem++ = (char)oemchar;
 204                         nbytes -= 2;
 205                 } else if (nbytes > 1) {
 206                         *oem++ = (char)oemchar;
 207                         nbytes--;
 208                 } else {
 209                         break;
 210                 }
 211 
 212                 count++;
 213                 ucs++;
 214         }
 215 
 216         *oem = '\0';
 217         return (count);
 218 }
 219 
 220 /*
 221  * Convert an oem string to a unicode string.
 222  *
 223  * The conversion will stop at the end of the oem string or
 224  * when nwchars - 1 have been converted.
 225  *
 226  * The number of converted oem chars is returned, or 0 on error.
 227  * An oem char may be either 1 or 2 bytes.
 228  */
 229 size_t
 230 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
 231 {
 232         oempage_t       *oempage;
 233         size_t          count = nwchars;
 234         smb_wchar_t     oemchar;
 235 
 236         if (ucs == NULL || oem == NULL)
 237                 return (0);
 238 
 239         if ((oempage = oem_get_oempage(cpid)) == NULL)
 240                 return (0);
 241 
 242         while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
 243                 /*
 244                  * Cannot find one byte oemchar in table.
 245                  * Must be a lead byte. Try two bytes.
 246                  */
 247                 if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
 248                         oemchar = oemchar << 8 | (*oem++ & 0xff);
 249                         if (oempage->value[oemchar] == 0) {
 250                                 *ucs = 0;
 251                                 break;
 252                         }
 253                 }
 254 #ifdef _BIG_ENDIAN
 255                 *ucs = LE_IN16(&oempage->value[oemchar]);
 256 #else
 257                 *ucs = oempage->value[oemchar];
 258 #endif
 259                 count--;
 260                 ucs++;
 261         }
 262 
 263         *ucs = 0;
 264         return (nwchars - count);
 265 }
 266 
 267 /*
 268  * Get a pointer to the oem page for the specific codepage id.
 269  */
 270 static oempage_t *
 271 oem_get_oempage(uint32_t cpid)
 272 {
 273         if (cpid >= MAX_OEMPAGES)
 274                 return (NULL);
 275 
 276         if (!oemcpg_table[cpid].valid) {
 277                 oem_codepage_init(cpid);
 278 
 279                 if (!oemcpg_table[cpid].valid)
 280                         return (NULL);
 281         }
 282 
 283         return (&oemcpg_table[cpid].oempage);
 284 }
 285 
 286 /*
 287  * Get a pointer to the ucs page for the specific codepage id.
 288  */
 289 static oempage_t *
 290 oem_get_ucspage(uint32_t cpid)
 291 {
 292         if (cpid >= MAX_OEMPAGES)
 293                 return (NULL);
 294 
 295         if (!oemcpg_table[cpid].valid) {
 296                 oem_codepage_init(cpid);
 297 
 298                 if (!oemcpg_table[cpid].valid)
 299                         return (NULL);
 300         }
 301 
 302         return (&oemcpg_table[cpid].ucspage);
 303 }
 304 
 305 /*
 306  * Initialize the oem page in the oem table.
 307  */
 308 static void
 309 oem_codepage_init(uint32_t cpid)
 310 {
 311 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
 312         static mutex_t mutex;
 313 
 314         (void) mutex_lock(&mutex);
 315         oem_codepage_setup(cpid);
 316         (void) mutex_unlock(&mutex);
 317 #else
 318         static kmutex_t mutex;
 319 
 320         mutex_enter(&mutex);
 321         oem_codepage_setup(cpid);
 322         mutex_exit(&mutex);
 323 #endif /* _KERNEL */
 324 }
 325 
 326 static void
 327 oem_codepage_setup(uint32_t cpid)
 328 {
 329         const smb_wchar_t *default_oem_cp;
 330         oem_codepage_t  *oemcpg;
 331         uint32_t        bytesperchar;
 332         uint32_t        max_oem_index;
 333         int             i;
 334 
 335         switch (cpid) {
 336         case OEM_CPG_850:
 337                 default_oem_cp = oem_codepage_850;
 338                 break;
 339         case OEM_CPG_1252:
 340                 default_oem_cp = oem_codepage_1252;
 341         default:
 342                 return;
 343         }
 344 
 345         oemcpg = &oemcpg_table[cpid];
 346         if (oemcpg->valid)
 347                 return;
 348 
 349         /*
 350          * max_oem_index will be 256 or 65536 dependent
 351          * on the OEM codepage.
 352          */
 353         bytesperchar = oemcpg_table[cpid].bytesperchar;
 354         max_oem_index = 1 << (bytesperchar * 8);
 355 
 356         oemcpg->oempage.value =
 357             MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
 358         if (oemcpg->oempage.value == NULL)
 359                 return;
 360 
 361         oemcpg->ucspage.value =
 362             MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
 363         if (oemcpg->ucspage.value == NULL) {
 364                 MEM_FREE("oem", oemcpg->oempage.value);
 365                 oemcpg->oempage.value = NULL;
 366                 return;
 367         }
 368 
 369         for (i = 0; i < max_oem_index; i++) {
 370                 oemcpg->oempage.value[i] = default_oem_cp[i];
 371                 oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
 372         }
 373 
 374         oemcpg->valid = B_TRUE;
 375 }