root/src/vfs/smbfs/helpers/lib/charset.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. add_dos_char
  2. charset_initialise
  3. load_client_codepage
  4. codepage_initialise
  5. add_char_string

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Character set handling
   5 
   6    Copyright (C) Andrew Tridgell 1992-1998
   7 
   8    Copyright (C) 2011-2019
   9    Free Software Foundation, Inc.
  10 
  11    This file is part of the Midnight Commander.
  12 
  13    The Midnight Commander is free software: you can redistribute it
  14    and/or modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation, either version 3 of the License,
  16    or (at your option) any later version.
  17 
  18    The Midnight Commander is distributed in the hope that it will be useful,
  19    but WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21    GNU General Public License for more details.
  22 
  23    You should have received a copy of the GNU General Public License
  24    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  25  */
  26 
  27 #define CHARSET_C
  28 #include "includes.h"
  29 
  30 const char *unix_error_string (int error_num);
  31 extern int DEBUGLEVEL;
  32 
  33 /*
  34  * Codepage definitions.
  35  */
  36 
  37 #if !defined(KANJI)
  38 /* lower->upper mapping for IBM Code Page 850 - MS-DOS Latin 1 */
  39 unsigned char const cp_850[][4] = {
  40     /* dec col/row oct hex  description */
  41     /* 133  08/05  205  85  a grave */
  42     /* 183  11/07  267  B7  A grave */ {0x85, 0xB7, 1, 1},
  43     /* 160  10/00  240  A0  a acute */
  44     /* 181  11/05  265  B5  A acute */ {0xA0, 0xB5, 1, 1},
  45     /* 131  08/03  203  83  a circumflex */
  46     /* 182  11/06  266  B6  A circumflex */ {0x83, 0xB6, 1, 1},
  47     /* 198  12/06  306  C6  a tilde */
  48     /* 199  12/07  307  C7  A tilde */ {0xC6, 0xC7, 1, 1},
  49     /* 132  08/04  204  84  a diaeresis */
  50     /* 142  08/14  216  8E  A diaeresis */ {0x84, 0x8E, 1, 1},
  51     /* 134  08/06  206  86  a ring */
  52     /* 143  08/15  217  8F  A ring */ {0x86, 0x8F, 1, 1},
  53     /* 145  09/01  221  91  ae diphthong */
  54     /* 146  09/02  222  92  AE diphthong */ {0x91, 0x92, 1, 1},
  55     /* 135  08/07  207  87  c cedilla */
  56     /* 128  08/00  200  80  C cedilla */ {0x87, 0x80, 1, 1},
  57     /* 138  08/10  212  8A  e grave */
  58     /* 212  13/04  324  D4  E grave */ {0x8A, 0xD4, 1, 1},
  59     /* 130  08/02  202  82  e acute */
  60     /* 144  09/00  220  90  E acute */ {0x82, 0x90, 1, 1},
  61     /* 136  08/08  210  88  e circumflex */
  62     /* 210  13/02  322  D2  E circumflex */ {0x88, 0xD2, 1, 1},
  63     /* 137  08/09  211  89  e diaeresis */
  64     /* 211  13/03  323  D3  E diaeresis */ {0x89, 0xD3, 1, 1},
  65     /* 141  08/13  215  8D  i grave */
  66     /* 222  13/14  336  DE  I grave */ {0x8D, 0xDE, 1, 1},
  67     /* 161  10/01  241  A1  i acute */
  68     /* 214  13/06  326  D6  I acute */ {0xA1, 0xD6, 1, 1},
  69     /* 140  08/12  214  8C  i circumflex */
  70     /* 215  13/07  327  D7  I circumflex */ {0x8C, 0xD7, 1, 1},
  71     /* 139  08/11  213  8B  i diaeresis */
  72     /* 216  13/08  330  D8  I diaeresis */ {0x8B, 0xD8, 1, 1},
  73     /* 208  13/00  320  D0  Icelandic eth */
  74     /* 209  13/01  321  D1  Icelandic Eth */ {0xD0, 0xD1, 1, 1},
  75     /* 164  10/04  244  A4  n tilde */
  76     /* 165  10/05  245  A5  N tilde */ {0xA4, 0xA5, 1, 1},
  77     /* 149  09/05  225  95  o grave */
  78     /* 227  14/03  343  E3  O grave */ {0x95, 0xE3, 1, 1},
  79     /* 162  10/02  242  A2  o acute */
  80     /* 224  14/00  340  E0  O acute */ {0xA2, 0xE0, 1, 1},
  81     /* 147  09/03  223  93  o circumflex */
  82     /* 226  14/02  342  E2  O circumflex */ {0x93, 0xE2, 1, 1},
  83     /* 228  14/04  344  E4  o tilde */
  84     /* 229  14/05  345  E5  O tilde */ {0xE4, 0xE5, 1, 1},
  85     /* 148  09/04  224  94  o diaeresis */
  86     /* 153  09/09  231  99  O diaeresis */ {0x94, 0x99, 1, 1},
  87     /* 155  09/11  233  9B  o slash */
  88     /* 157  09/13  235  9D  O slash */ {0x9B, 0x9D, 1, 1},
  89     /* 151  09/07  227  97  u grave */
  90     /* 235  14/11  353  EB  U grave */ {0x97, 0xEB, 1, 1},
  91     /* 163  10/03  243  A3  u acute */
  92     /* 233  14/09  351  E9  U acute */ {0xA3, 0xE9, 1, 1},
  93     /* 150  09/06  226  96  u circumflex */
  94     /* 234  14/10  352  EA  U circumflex */ {0x96, 0xEA, 1, 1},
  95     /* 129  08/01  201  81  u diaeresis */
  96     /* 154  09/10  232  9A  U diaeresis */ {0x81, 0x9A, 1, 1},
  97     /* 236  14/12  354  EC  y acute */
  98     /* 237  14/13  355  ED  Y acute */ {0xEC, 0xED, 1, 1},
  99     /* 231  14/07  347  E7  Icelandic thorn */
 100     /* 232  14/08  350  E8  Icelandic Thorn */ {0xE7, 0xE8, 1, 1},
 101 
 102     {0x9C, 0, 0, 0},            /* Pound        */
 103     {0, 0, 0, 0}
 104 };
 105 #else /* KANJI */
 106 /* lower->upper mapping for IBM Code Page 932 - MS-DOS Japanese SJIS */
 107 unsigned char const cp_932[][4] = {
 108     {0, 0, 0, 0}
 109 };
 110 #endif /* KANJI */
 111 
 112 char xx_dos_char_map[256];
 113 char xx_upper_char_map[256];
 114 char xx_lower_char_map[256];
 115 
 116 char *dos_char_map = xx_dos_char_map;
 117 char *upper_char_map = xx_upper_char_map;
 118 char *lower_char_map = xx_lower_char_map;
 119 
 120 /*
 121  * This code has been extended to deal with ascynchronous mappings
 122  * like MS-DOS Latin US (Code page 437) where things like :
 123  * a acute are capitalized to 'A', but the reverse mapping
 124  * must not hold true. This allows the filename case insensitive
 125  * matching in do_match() to work, as the DOS/Win95/NT client 
 126  * uses 'A' as a mask to match against characters like a acute.
 127  * This is the meaning behind the parameters that allow a
 128  * mapping from lower to upper, but not upper to lower.
 129  */
 130 
 131 static void
 132 add_dos_char (int lower, BOOL map_lower_to_upper, int upper, BOOL map_upper_to_lower)
     /* [previous][next][first][last][top][bottom][index][help]  */
 133 {
 134     lower &= 0xff;
 135     upper &= 0xff;
 136     DEBUGADD (6, ("Adding chars 0x%x 0x%x (l->u = %s) (u->l = %s)\n",
 137                   lower, upper,
 138                   map_lower_to_upper ? "True" : "False", map_upper_to_lower ? "True" : "False"));
 139     if (lower)
 140         dos_char_map[lower] = 1;
 141     if (upper)
 142         dos_char_map[upper] = 1;
 143     lower_char_map[lower] = (char) lower;       /* Define tolower(lower) */
 144     upper_char_map[upper] = (char) upper;       /* Define toupper(upper) */
 145     if (lower && upper)
 146     {
 147         if (map_upper_to_lower)
 148             lower_char_map[upper] = (char) lower;
 149         if (map_lower_to_upper)
 150             upper_char_map[lower] = (char) upper;
 151     }
 152 }
 153 
 154 /****************************************************************************
 155 initialise the charset arrays
 156 ****************************************************************************/
 157 void
 158 charset_initialise (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 159 {
 160     int i;
 161 
 162 #ifdef LC_ALL
 163     /* include <locale.h> in includes.h if available for OS                  */
 164     /* we take only standard 7-bit ASCII definitions from ctype              */
 165     setlocale (LC_ALL, "C");
 166 #endif
 167 
 168     for (i = 0; i <= 255; i++)
 169     {
 170         dos_char_map[i] = 0;
 171     }
 172 
 173     for (i = 0; i <= 127; i++)
 174     {
 175         if (isalnum (i) || strchr ("._^$~!#%&-{}()@'`", (char) i))
 176             add_dos_char (i, False, 0, False);
 177     }
 178 
 179     for (i = 0; i <= 255; i++)
 180     {
 181         char c = (char) i;
 182         upper_char_map[i] = lower_char_map[i] = c;
 183 
 184         /* Some systems have buggy isupper/islower for characters
 185            above 127. Best not to rely on them. */
 186         if (i < 128)
 187         {
 188             if (isupper ((int) c))
 189                 lower_char_map[i] = tolower (c);
 190             if (islower ((int) c))
 191                 upper_char_map[i] = toupper (c);
 192         }
 193     }
 194 }
 195 
 196 /****************************************************************************
 197 load the client codepage.
 198 ****************************************************************************/
 199 
 200 typedef const unsigned char (*codepage_p)[4];
 201 
 202 static codepage_p
 203 load_client_codepage (int client_codepage)
     /* [previous][next][first][last][top][bottom][index][help]  */
 204 {
 205     pstring codepage_file_name;
 206     unsigned char buf[8];
 207     FILE *fp = NULL;
 208     SMB_OFF_T size;
 209     codepage_p cp_p = NULL;
 210     SMB_STRUCT_STAT st;
 211 
 212     DEBUG (5, ("load_client_codepage: loading codepage %d.\n", client_codepage));
 213 
 214     if (strlen (CODEPAGEDIR) + 14 > sizeof (codepage_file_name))
 215     {
 216         DEBUG (0, ("load_client_codepage: filename too long to load\n"));
 217         return NULL;
 218     }
 219 
 220     pstrcpy (codepage_file_name, CODEPAGEDIR);
 221     pstrcat (codepage_file_name, "/");
 222     pstrcat (codepage_file_name, "codepage.");
 223     slprintf (&codepage_file_name[strlen (codepage_file_name)],
 224               sizeof (pstring) - (strlen (codepage_file_name) + 1), "%03d", client_codepage);
 225 
 226     if (sys_stat (codepage_file_name, &st) != 0)
 227     {
 228         DEBUG (0, ("load_client_codepage: filename %s does not exist.\n", codepage_file_name));
 229         return NULL;
 230     }
 231 
 232     /* Check if it is at least big enough to hold the required
 233        data. Should be 2 byte version, 2 byte codepage, 4 byte length, 
 234        plus zero or more bytes of data. Note that the data cannot be more
 235        than 4 * MAXCODEPAGELINES bytes.
 236      */
 237     size = st.st_size;
 238 
 239     if (size < CODEPAGE_HEADER_SIZE || size > (CODEPAGE_HEADER_SIZE + 4 * MAXCODEPAGELINES))
 240     {
 241         DEBUG (0, ("load_client_codepage: file %s is an incorrect size for a \
 242 code page file (size=%d).\n", codepage_file_name, (int) size));
 243         return NULL;
 244     }
 245 
 246     /* Read the first 8 bytes of the codepage file - check
 247        the version number and code page number. All the data
 248        is held in little endian format.
 249      */
 250 
 251     if ((fp = sys_fopen (codepage_file_name, "r")) == NULL)
 252     {
 253         DEBUG (0, ("load_client_codepage: cannot open file %s. Error was %s\n",
 254                    codepage_file_name, unix_error_string (errno)));
 255         return NULL;
 256     }
 257 
 258     if (fread (buf, 1, CODEPAGE_HEADER_SIZE, fp) != CODEPAGE_HEADER_SIZE)
 259     {
 260         DEBUG (0, ("load_client_codepage: cannot read header from file %s. Error was %s\n",
 261                    codepage_file_name, unix_error_string (errno)));
 262         goto clean_and_exit;
 263     }
 264 
 265     /* Check the version value */
 266     if (SVAL (buf, CODEPAGE_VERSION_OFFSET) != CODEPAGE_FILE_VERSION_ID)
 267     {
 268         DEBUG (0, ("load_client_codepage: filename %s has incorrect version id. \
 269 Needed %hu, got %hu.\n", codepage_file_name, (uint16) CODEPAGE_FILE_VERSION_ID, SVAL (buf, CODEPAGE_VERSION_OFFSET)));
 270         goto clean_and_exit;
 271     }
 272 
 273     /* Check the codepage matches */
 274     if (SVAL (buf, CODEPAGE_CLIENT_CODEPAGE_OFFSET) != (uint16) client_codepage)
 275     {
 276         DEBUG (0, ("load_client_codepage: filename %s has incorrect codepage. \
 277 Needed %hu, got %hu.\n", codepage_file_name, (uint16) client_codepage, SVAL (buf, CODEPAGE_CLIENT_CODEPAGE_OFFSET)));
 278         goto clean_and_exit;
 279     }
 280 
 281     /* Check the length is correct. */
 282     if (IVAL (buf, CODEPAGE_LENGTH_OFFSET) != (size - CODEPAGE_HEADER_SIZE))
 283     {
 284         DEBUG (0, ("load_client_codepage: filename %s has incorrect size headers. \
 285 Needed %u, got %u.\n", codepage_file_name, (uint32) (size - CODEPAGE_HEADER_SIZE), IVAL (buf, CODEPAGE_LENGTH_OFFSET)));
 286         goto clean_and_exit;
 287     }
 288 
 289     size -= CODEPAGE_HEADER_SIZE;       /* Remove header */
 290 
 291     /* Make sure the size is a multiple of 4. */
 292     if ((size % 4) != 0)
 293     {
 294         DEBUG (0, ("load_client_codepage: filename %s has a codepage size not a \
 295 multiple of 4.\n", codepage_file_name));
 296         goto clean_and_exit;
 297     }
 298 
 299     /* Allocate space for the code page file and read it all in. */
 300     if ((cp_p = (codepage_p) malloc (size + 4)) == NULL)
 301     {
 302         DEBUG (0, ("load_client_codepage: malloc fail.\n"));
 303         goto clean_and_exit;
 304     }
 305 
 306     if (fread ((char *) cp_p, 1, size, fp) != size)
 307     {
 308         DEBUG (0, ("load_client_codepage: read fail on file %s. Error was %s.\n",
 309                    codepage_file_name, unix_error_string (errno)));
 310         goto clean_and_exit;
 311     }
 312 
 313     /* Ensure array is correctly terminated. */
 314     memset (((char *) cp_p) + size, '\0', 4);
 315 
 316     fclose (fp);
 317     return cp_p;
 318 
 319   clean_and_exit:
 320 
 321     /* pseudo destructor :-) */
 322 
 323     if (fp != NULL)
 324         fclose (fp);
 325     if (cp_p)
 326         free ((char *) cp_p);
 327     return NULL;
 328 }
 329 
 330 /****************************************************************************
 331 initialise the client codepage.
 332 ****************************************************************************/
 333 void
 334 codepage_initialise (int client_codepage)
     /* [previous][next][first][last][top][bottom][index][help]  */
 335 {
 336     int i;
 337     static codepage_p cp = NULL;
 338 
 339     if (cp != NULL)
 340     {
 341         DEBUG (6,
 342                ("codepage_initialise: called twice - ignoring second client code page = %d\n",
 343                 client_codepage));
 344         return;
 345     }
 346 
 347     DEBUG (6, ("codepage_initialise: client code page = %d\n", client_codepage));
 348 
 349     /*
 350      * Known client codepages - these can be added to.
 351      */
 352     cp = load_client_codepage (client_codepage);
 353 
 354     if (cp == NULL)
 355     {
 356 #ifdef KANJI
 357         DEBUG (6, ("codepage_initialise: loading dynamic codepage file %s/codepage.%d \
 358 for code page %d failed. Using default client codepage 932\n", CODEPAGEDIR, client_codepage, client_codepage));
 359         cp = cp_932;
 360         client_codepage = KANJI_CODEPAGE;
 361 #else /* KANJI */
 362         DEBUG (6, ("codepage_initialise: loading dynamic codepage file %s/codepage.%d \
 363 for code page %d failed. Using default client codepage 850\n", CODEPAGEDIR, client_codepage, client_codepage));
 364         cp = cp_850;
 365         client_codepage = MSDOS_LATIN_1_CODEPAGE;
 366 #endif /* KANJI */
 367     }
 368 
 369     /*
 370      * Setup the function pointers for the loaded codepage.
 371      */
 372     initialize_multibyte_vectors (client_codepage);
 373 
 374     if (cp)
 375     {
 376         for (i = 0; !((cp[i][0] == '\0') && (cp[i][1] == '\0')); i++)
 377             add_dos_char (cp[i][0], (BOOL) cp[i][2], cp[i][1], (BOOL) cp[i][3]);
 378     }
 379 }
 380 
 381 /*******************************************************************
 382 add characters depending on a string passed by the user
 383 ********************************************************************/
 384 void
 385 add_char_string (const char *s)
     /* [previous][next][first][last][top][bottom][index][help]  */
 386 {
 387     char *extra_chars = (char *) strdup (s);
 388     char *t;
 389     if (!extra_chars)
 390         return;
 391 
 392     for (t = strtok (extra_chars, " \t\r\n"); t; t = strtok (NULL, " \t\r\n"))
 393     {
 394         char c1 = 0, c2 = 0;
 395         int i1 = 0, i2 = 0;
 396         if (isdigit ((unsigned char) *t) || (*t) == '-')
 397         {
 398             sscanf (t, "%i:%i", &i1, &i2);
 399             add_dos_char (i1, True, i2, True);
 400         }
 401         else
 402         {
 403             sscanf (t, "%c:%c", &c1, &c2);
 404             add_dos_char ((unsigned char) c1, True, (unsigned char) c2, True);
 405         }
 406     }
 407 
 408     free (extra_chars);
 409 }

/* [previous][next][first][last][top][bottom][index][help]  */