1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 /*
29 * Support for oem <-> unicode translations.
30 */
31
32 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
33 #include <stdlib.h>
34 #include <thread.h>
35 #include <synch.h>
36 #include <string.h>
37 #else
38 #include <sys/types.h>
39 #include <sys/ksynch.h>
40 #include <sys/sunddi.h>
41 #endif /* _KERNEL */
42
43 #include <sys/byteorder.h>
44 #include <smbsrv/alloc.h>
45 #include <smbsrv/string.h>
46
47 /*
48 * cpid The oemcpg_table index for this oempage.
49 * value The conversion values.
50 */
51 typedef struct oempage {
52 uint32_t cpid;
53 smb_wchar_t *value;
54 } oempage_t;
55
56 /*
57 * filename The actual filename contains the codepage.
58 * bytesperchar The codepage uses double or single bytes per char.
59 * oempage The oempage is used to convert Unicode characters to
60 * OEM characters. Memory needs to be allocated for
61 * the value field of oempage to store the table.
62 * ucspage The unicode page is used to convert OEM characters
63 * to Unicode characters. Memory needs to be allocated
64 * for the value field of ucspage to store the table.
65 * valid True if the codepage has been initialized.
66 */
67 typedef struct oem_codepage {
68 char *filename;
69 uint32_t bytesperchar;
70 oempage_t oempage;
71 oempage_t ucspage;
72 boolean_t valid;
73 } oem_codepage_t;
74
75 static oem_codepage_t oemcpg_table[] = {
76 {"850.cpg", 1, {0, 0}, {0, 0}, 0}, /* Multilingual Latin1 */
77 {"950.cpg", 2, {1, 0}, {1, 0}, 0}, /* Chinese Traditional */
78 {"1252.cpg", 1, {2, 0}, {2, 0}, 0}, /* MS Latin1 */
79 {"949.cpg", 2, {3, 0}, {3, 0}, 0}, /* Korean */
80 {"936.cpg", 2, {4, 0}, {4, 0}, 0}, /* Chinese Simplified */
81 {"932.cpg", 2, {5, 0}, {5, 0}, 0}, /* Japanese */
82 {"852.cpg", 1, {6, 0}, {6, 0}, 0}, /* Multilingual Latin2 */
83 {"1250.cpg", 1, {7, 0}, {7, 0}, 0}, /* MS Latin2 */
84 {"1253.cpg", 1, {8, 0}, {8, 0}, 0}, /* MS Greek */
85 {"737.cpg", 1, {9, 0}, {9, 0}, 0}, /* Greek */
86 {"1254.cpg", 1, {10, 0}, {10, 0}, 0}, /* MS Turkish */
87 {"857.cpg", 1, {11, 0}, {11, 0}, 0}, /* Multilingual Latin5 */
88 {"1251.cpg", 1, {12, 0}, {12, 0}, 0}, /* MS Cyrillic */
89 {"866.cpg", 1, {13, 0}, {13, 0}, 0}, /* Cyrillic II */
90 {"1255.cpg", 1, {14, 0}, {14, 0}, 0}, /* MS Hebrew */
91 {"862.cpg", 1, {15, 0}, {15, 0}, 0}, /* Hebrew */
92 {"1256.cpg", 1, {16, 0}, {16, 0}, 0}, /* MS Arabic */
93 {"720.cpg", 1, {17, 0}, {17, 0}, 0} /* Arabic */
94 };
95
96 #define MAX_OEMPAGES (sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
97 #define MAX_UNICODE_IDX 65536
98
99 /*
100 * The default SMB OEM codepage for English is codepage 850.
101 */
102 const smb_wchar_t oem_codepage_850[256] = {
103 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
104 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
105 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
106 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
107 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
108 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
109 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
110 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
111 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
112 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
113 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
114 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
115 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
116 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
117 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
118 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
119 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
120 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
121 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
122 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
123 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
124 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
125 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
126 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
127 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
128 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
129 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
130 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
131 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
132 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
133 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
134 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
135 };
136
137 /*
138 * The default telnet OEM codepage for English is codepage 1252.
139 */
140 const smb_wchar_t oem_codepage_1252[256] = {
141 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
142 0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
143 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
144 0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
145 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
146 0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
147 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
148 0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
149 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
150 0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
151 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
152 0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
153 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
154 0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
155 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
156 0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
157 0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
158 0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
159 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
160 0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
161 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
162 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
163 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
164 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
165 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
166 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
167 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
168 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
169 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
170 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
171 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
172 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
173 };
174
175 static oempage_t *oem_get_oempage(uint32_t);
176 static oempage_t *oem_get_ucspage(uint32_t);
177 static void oem_codepage_init(uint32_t);
178 static void oem_codepage_setup(uint32_t);
179
180 /*
181 * Convert a unicode string to an oem string.
182 *
183 * The conversion will stop at the end of the unicode string
184 * or when (nbytes - 1) oem characters have been stored.
185 *
186 * The number of converted unicode characters is returned,
187 * or 0 on error.
188 */
189 size_t
190 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
191 {
192 oempage_t *ucspage;
193 uint32_t count = 0;
194 smb_wchar_t oemchar;
195
196 if (ucs == NULL || oem == NULL)
197 return (0);
198
199 if ((ucspage = oem_get_ucspage(cpid)) == NULL)
200 return (0);
201
202 while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
203 if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
204 *oem++ = oemchar >> 8;
205 *oem++ = (char)oemchar;
206 nbytes -= 2;
207 } else if (nbytes > 1) {
208 *oem++ = (char)oemchar;
209 nbytes--;
210 } else {
211 break;
212 }
213
214 count++;
215 ucs++;
216 }
217
218 *oem = '\0';
219 return (count);
220 }
221
222 /*
223 * Convert an oem string to a unicode string.
224 *
225 * The conversion will stop at the end of the oem string or
226 * when nwchars - 1 have been converted.
227 *
228 * The number of converted oem chars is returned, or 0 on error.
229 * An oem char may be either 1 or 2 bytes.
230 */
231 size_t
232 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
233 {
234 oempage_t *oempage;
235 size_t count = nwchars;
236 smb_wchar_t oemchar;
237
238 if (ucs == NULL || oem == NULL)
239 return (0);
240
241 if ((oempage = oem_get_oempage(cpid)) == NULL)
242 return (0);
243
244 while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
245 /*
246 * Cannot find one byte oemchar in table.
247 * Must be a lead byte. Try two bytes.
248 */
249 if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
250 oemchar = oemchar << 8 | (*oem++ & 0xff);
251 if (oempage->value[oemchar] == 0) {
252 *ucs = 0;
253 break;
254 }
255 }
256 #ifdef _BIG_ENDIAN
257 *ucs = LE_IN16(&oempage->value[oemchar]);
258 #else
259 *ucs = oempage->value[oemchar];
260 #endif
261 count--;
262 ucs++;
263 }
264
265 *ucs = 0;
266 return (nwchars - count);
267 }
268
269 /*
270 * Get a pointer to the oem page for the specific codepage id.
271 */
272 static oempage_t *
273 oem_get_oempage(uint32_t cpid)
274 {
275 if (cpid >= MAX_OEMPAGES)
276 return (NULL);
277
278 if (!oemcpg_table[cpid].valid) {
279 oem_codepage_init(cpid);
280
281 if (!oemcpg_table[cpid].valid)
282 return (NULL);
283 }
284
285 return (&oemcpg_table[cpid].oempage);
286 }
287
288 /*
289 * Get a pointer to the ucs page for the specific codepage id.
290 */
291 static oempage_t *
292 oem_get_ucspage(uint32_t cpid)
293 {
294 if (cpid >= MAX_OEMPAGES)
295 return (NULL);
296
297 if (!oemcpg_table[cpid].valid) {
298 oem_codepage_init(cpid);
299
300 if (!oemcpg_table[cpid].valid)
301 return (NULL);
302 }
303
304 return (&oemcpg_table[cpid].ucspage);
305 }
306
307 /*
308 * Initialize the oem page in the oem table.
309 */
310 static void
311 oem_codepage_init(uint32_t cpid)
312 {
313 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
314 static mutex_t mutex;
315
316 (void) mutex_lock(&mutex);
317 oem_codepage_setup(cpid);
318 (void) mutex_unlock(&mutex);
319 #else
320 static kmutex_t mutex;
321
322 mutex_enter(&mutex);
323 oem_codepage_setup(cpid);
324 mutex_exit(&mutex);
325 #endif /* _KERNEL */
326 }
327
328 static void
329 oem_codepage_setup(uint32_t cpid)
330 {
331 const smb_wchar_t *default_oem_cp;
332 oem_codepage_t *oemcpg;
333 uint32_t bytesperchar;
334 uint32_t max_oem_index;
335 int i;
336
337 switch (cpid) {
338 case OEM_CPG_850:
339 default_oem_cp = oem_codepage_850;
340 break;
341 case OEM_CPG_1252:
342 default_oem_cp = oem_codepage_1252;
343 default:
344 return;
345 }
346
347 oemcpg = &oemcpg_table[cpid];
348 if (oemcpg->valid)
349 return;
350
351 /*
352 * max_oem_index will be 256 or 65536 dependent
353 * on the OEM codepage.
354 */
355 bytesperchar = oemcpg_table[cpid].bytesperchar;
356 max_oem_index = 1 << (bytesperchar * 8);
357
358 oemcpg->oempage.value =
359 MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
360 if (oemcpg->oempage.value == NULL)
361 return;
362
363 oemcpg->ucspage.value =
364 MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
365 if (oemcpg->ucspage.value == NULL) {
366 MEM_FREE("oem", oemcpg->oempage.value);
367 oemcpg->oempage.value = NULL;
368 return;
369 }
370
371 for (i = 0; i < max_oem_index; i++) {
372 oemcpg->oempage.value[i] = default_oem_cp[i];
373 oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
374 }
375
376 oemcpg->valid = B_TRUE;
377 }