root/lib/charsets.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. new_codepage_desc
  2. free_codepage_desc
  3. load_codepages_list_from_file
  4. translate_character
  5. load_codepages_list
  6. free_codepages_list
  7. get_codepage_id
  8. get_codepage_index
  9. is_supported_encoding
  10. init_translation_table
  11. convert_to_display
  12. str_convert_to_display
  13. str_nconvert_to_display
  14. convert_from_input
  15. str_convert_to_input
  16. str_nconvert_to_input
  17. convert_from_utf_to_current
  18. convert_from_utf_to_current_c
  19. convert_from_8bit_to_utf_c
  20. convert_from_8bit_to_utf_c2

   1 /*
   2    Text conversion from one charset to another.
   3 
   4    Copyright (C) 2001-2019
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Walery Studennikov <despair@sama.ru>
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25 
  26 /** \file charsets.c
  27  *  \brief Source: Text conversion from one charset to another
  28  */
  29 
  30 #include <config.h>
  31 
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #include "lib/global.h"
  37 #include "lib/strutil.h"        /* utf-8 functions */
  38 #include "lib/fileloc.h"
  39 #include "lib/util.h"           /* whitespace() */
  40 
  41 #include "lib/charsets.h"
  42 
  43 /*** global variables ****************************************************************************/
  44 
  45 GPtrArray *codepages = NULL;
  46 
  47 unsigned char conv_displ[256];
  48 unsigned char conv_input[256];
  49 
  50 const char *cp_display = NULL;
  51 const char *cp_source = NULL;
  52 
  53 /*** file scope macro definitions ****************************************************************/
  54 
  55 #define UNKNCHAR '\001'
  56 
  57 #define OTHER_8BIT "Other_8_bit"
  58 
  59 /*** file scope type declarations ****************************************************************/
  60 
  61 /*** file scope variables ************************************************************************/
  62 
  63 /*** file scope functions ************************************************************************/
  64 /* --------------------------------------------------------------------------------------------- */
  65 
  66 static codepage_desc *
  67 new_codepage_desc (const char *id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
  68 {
  69     codepage_desc *desc;
  70 
  71     desc = g_new (codepage_desc, 1);
  72     desc->id = g_strdup (id);
  73     desc->name = g_strdup (name);
  74 
  75     return desc;
  76 }
  77 
  78 /* --------------------------------------------------------------------------------------------- */
  79 
  80 static void
  81 free_codepage_desc (gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help]  */
  82 {
  83     codepage_desc *desc = (codepage_desc *) data;
  84     (void) user_data;
  85 
  86     g_free (desc->id);
  87     g_free (desc->name);
  88     g_free (desc);
  89 }
  90 
  91 /* --------------------------------------------------------------------------------------------- */
  92 /* returns display codepage */
  93 
  94 static void
  95 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
     /* [previous][next][first][last][top][bottom][index][help]  */
  96 {
  97     FILE *f;
  98     char buf[BUF_MEDIUM];
  99     char *default_codepage = NULL;
 100 
 101     f = fopen (fname, "r");
 102     if (f == NULL)
 103         return;
 104 
 105     while (fgets (buf, sizeof buf, f) != NULL)
 106     {
 107         /* split string into id and cpname */
 108         char *p = buf;
 109         size_t buflen;
 110 
 111         if (*p == '\n' || *p == '\0' || *p == '#')
 112             continue;
 113 
 114         buflen = strlen (buf);
 115 
 116         if (buflen != 0 && buf[buflen - 1] == '\n')
 117             buf[buflen - 1] = '\0';
 118         while (*p != '\0' && !whitespace (*p))
 119             ++p;
 120         if (*p == '\0')
 121             goto fail;
 122 
 123         *p++ = '\0';
 124         g_strstrip (p);
 125         if (*p == '\0')
 126             goto fail;
 127 
 128         if (strcmp (buf, "default") == 0)
 129             default_codepage = g_strdup (p);
 130         else
 131         {
 132             const char *id = buf;
 133 
 134             if (*list == NULL)
 135             {
 136                 *list = g_ptr_array_sized_new (16);
 137                 g_ptr_array_add (*list, new_codepage_desc (id, p));
 138             }
 139             else
 140             {
 141                 unsigned int i;
 142 
 143                 /* whether id is already present in list */
 144                 /* if yes, overwrite description */
 145                 for (i = 0; i < (*list)->len; i++)
 146                 {
 147                     codepage_desc *desc;
 148 
 149                     desc = (codepage_desc *) g_ptr_array_index (*list, i);
 150 
 151                     if (strcmp (id, desc->id) == 0)
 152                     {
 153                         /* found */
 154                         g_free (desc->name);
 155                         desc->name = g_strdup (p);
 156                         break;
 157                     }
 158                 }
 159 
 160                 /* not found */
 161                 if (i == (*list)->len)
 162                     g_ptr_array_add (*list, new_codepage_desc (id, p));
 163             }
 164         }
 165     }
 166 
 167     if (default_codepage != NULL)
 168     {
 169         mc_global.display_codepage = get_codepage_index (default_codepage);
 170         g_free (default_codepage);
 171     }
 172 
 173   fail:
 174     fclose (f);
 175 }
 176 
 177 /* --------------------------------------------------------------------------------------------- */
 178 
 179 static char
 180 translate_character (GIConv cd, char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 181 {
 182     gchar *tmp_buff = NULL;
 183     gsize bytes_read, bytes_written = 0;
 184     const char *ibuf = &c;
 185     char ch = UNKNCHAR;
 186     int ibuflen = 1;
 187 
 188     tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
 189     if (tmp_buff != NULL)
 190         ch = tmp_buff[0];
 191     g_free (tmp_buff);
 192     return ch;
 193 }
 194 
 195 /* --------------------------------------------------------------------------------------------- */
 196 /*** public functions ****************************************************************************/
 197 /* --------------------------------------------------------------------------------------------- */
 198 
 199 void
 200 load_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 201 {
 202     char *fname;
 203 
 204     /* 1: try load /usr/share/mc/mc.charsets */
 205     fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
 206     load_codepages_list_from_file (&codepages, fname);
 207     g_free (fname);
 208 
 209     /* 2: try load /etc/mc/mc.charsets */
 210     fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
 211     load_codepages_list_from_file (&codepages, fname);
 212     g_free (fname);
 213 
 214     if (codepages == NULL)
 215     {
 216         /* files are not found, add defaullt codepage */
 217         fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
 218 
 219         codepages = g_ptr_array_new ();
 220         g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
 221     }
 222 }
 223 
 224 /* --------------------------------------------------------------------------------------------- */
 225 
 226 void
 227 free_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 228 {
 229     g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
 230     g_ptr_array_free (codepages, TRUE);
 231     /* NULL-ize pointer to make unit tests happy */
 232     codepages = NULL;
 233 }
 234 
 235 /* --------------------------------------------------------------------------------------------- */
 236 
 237 const char *
 238 get_codepage_id (const int n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 239 {
 240     return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
 241 }
 242 
 243 /* --------------------------------------------------------------------------------------------- */
 244 
 245 int
 246 get_codepage_index (const char *id)
     /* [previous][next][first][last][top][bottom][index][help]  */
 247 {
 248     size_t i;
 249 
 250     if (codepages == NULL)
 251         return -1;
 252     if (strcmp (id, OTHER_8BIT) == 0)
 253         return -1;
 254     for (i = 0; i < codepages->len; i++)
 255         if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
 256             return i;
 257     return -1;
 258 }
 259 
 260 /* --------------------------------------------------------------------------------------------- */
 261 /** Check if specified encoding can be used in mc.
 262  * @param encoding name of encoding
 263  * @return TRUE if encoding is supported by mc, FALSE otherwise
 264  */
 265 
 266 gboolean
 267 is_supported_encoding (const char *encoding)
     /* [previous][next][first][last][top][bottom][index][help]  */
 268 {
 269     gboolean result = FALSE;
 270     guint t;
 271 
 272     for (t = 0; t < codepages->len; t++)
 273     {
 274         const char *id;
 275 
 276         id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
 277         result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
 278     }
 279 
 280     return result;
 281 }
 282 
 283 /* --------------------------------------------------------------------------------------------- */
 284 
 285 char *
 286 init_translation_table (int cpsource, int cpdisplay)
     /* [previous][next][first][last][top][bottom][index][help]  */
 287 {
 288     int i;
 289     GIConv cd;
 290 
 291     /* Fill inpit <-> display tables */
 292 
 293     if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
 294     {
 295         for (i = 0; i <= 255; ++i)
 296         {
 297             conv_displ[i] = i;
 298             conv_input[i] = i;
 299         }
 300         cp_source = cp_display;
 301         return NULL;
 302     }
 303 
 304     for (i = 0; i <= 127; ++i)
 305     {
 306         conv_displ[i] = i;
 307         conv_input[i] = i;
 308     }
 309     cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
 310     cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
 311 
 312     /* display <- inpit table */
 313 
 314     cd = g_iconv_open (cp_display, cp_source);
 315     if (cd == INVALID_CONV)
 316         return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
 317 
 318     for (i = 128; i <= 255; ++i)
 319         conv_displ[i] = translate_character (cd, i);
 320 
 321     g_iconv_close (cd);
 322 
 323     /* inpit <- display table */
 324 
 325     cd = g_iconv_open (cp_source, cp_display);
 326     if (cd == INVALID_CONV)
 327         return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
 328 
 329     for (i = 128; i <= 255; ++i)
 330     {
 331         unsigned char ch;
 332         ch = translate_character (cd, i);
 333         conv_input[i] = (ch == UNKNCHAR) ? i : ch;
 334     }
 335 
 336     g_iconv_close (cd);
 337 
 338     return NULL;
 339 }
 340 
 341 /* --------------------------------------------------------------------------------------------- */
 342 
 343 void
 344 convert_to_display (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 345 {
 346     if (str != NULL)
 347         for (; *str != '\0'; str++)
 348             *str = conv_displ[(unsigned char) *str];
 349 }
 350 
 351 /* --------------------------------------------------------------------------------------------- */
 352 
 353 GString *
 354 str_convert_to_display (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 355 {
 356     return str_nconvert_to_display (str, -1);
 357 }
 358 
 359 /* --------------------------------------------------------------------------------------------- */
 360 
 361 GString *
 362 str_nconvert_to_display (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 363 {
 364     GString *buff;
 365     GIConv conv;
 366 
 367     if (str == NULL)
 368         return g_string_new ("");
 369 
 370     if (cp_display == cp_source)
 371         return g_string_new (str);
 372 
 373     conv = str_crt_conv_from (cp_source);
 374 
 375     buff = g_string_new ("");
 376     str_nconvert (conv, str, len, buff);
 377     str_close_conv (conv);
 378     return buff;
 379 }
 380 
 381 /* --------------------------------------------------------------------------------------------- */
 382 
 383 void
 384 convert_from_input (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 385 {
 386     if (str != NULL)
 387         for (; *str != '\0'; str++)
 388             *str = conv_input[(unsigned char) *str];
 389 }
 390 
 391 /* --------------------------------------------------------------------------------------------- */
 392 
 393 GString *
 394 str_convert_to_input (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 395 {
 396     return str_nconvert_to_input (str, -1);
 397 }
 398 
 399 /* --------------------------------------------------------------------------------------------- */
 400 
 401 GString *
 402 str_nconvert_to_input (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 403 {
 404     GString *buff;
 405     GIConv conv;
 406 
 407     if (str == NULL)
 408         return g_string_new ("");
 409 
 410     if (cp_display == cp_source)
 411         return g_string_new (str);
 412 
 413     conv = str_crt_conv_to (cp_source);
 414 
 415     buff = g_string_new ("");
 416     str_nconvert (conv, str, len, buff);
 417     str_close_conv (conv);
 418     return buff;
 419 }
 420 
 421 /* --------------------------------------------------------------------------------------------- */
 422 
 423 unsigned char
 424 convert_from_utf_to_current (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 425 {
 426     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 427     unsigned char ch = '.';
 428     GIConv conv;
 429     const char *cp_to;
 430 
 431     if (str == NULL)
 432         return '.';
 433 
 434     cp_to = get_codepage_id (mc_global.source_codepage);
 435     conv = str_crt_conv_to (cp_to);
 436 
 437     if (conv != INVALID_CONV)
 438     {
 439         switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
 440         {
 441         case ESTR_SUCCESS:
 442             ch = buf_ch[0];
 443             break;
 444         case ESTR_PROBLEM:
 445         case ESTR_FAILURE:
 446             ch = '.';
 447             break;
 448         default:
 449             break;
 450         }
 451         str_close_conv (conv);
 452     }
 453 
 454     return ch;
 455 }
 456 
 457 /* --------------------------------------------------------------------------------------------- */
 458 
 459 unsigned char
 460 convert_from_utf_to_current_c (int input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 461 {
 462     unsigned char str[UTF8_CHAR_LEN + 1];
 463     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 464     unsigned char ch = '.';
 465     int res;
 466 
 467     res = g_unichar_to_utf8 (input_char, (char *) str);
 468     if (res == 0)
 469         return ch;
 470 
 471     str[res] = '\0';
 472 
 473     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 474     {
 475     case ESTR_SUCCESS:
 476         ch = buf_ch[0];
 477         break;
 478     case ESTR_PROBLEM:
 479     case ESTR_FAILURE:
 480         ch = '.';
 481         break;
 482     default:
 483         break;
 484     }
 485 
 486     return ch;
 487 }
 488 
 489 /* --------------------------------------------------------------------------------------------- */
 490 
 491 int
 492 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 493 {
 494     unsigned char str[2];
 495     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 496     int ch;
 497 
 498     str[0] = (unsigned char) input_char;
 499     str[1] = '\0';
 500 
 501     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 502     {
 503     case ESTR_SUCCESS:
 504         {
 505             int res;
 506 
 507             res = g_utf8_get_char_validated ((char *) buf_ch, -1);
 508             ch = res >= 0 ? res : buf_ch[0];
 509             break;
 510         }
 511     case ESTR_PROBLEM:
 512     case ESTR_FAILURE:
 513     default:
 514         ch = '.';
 515         break;
 516     }
 517 
 518     return ch;
 519 }
 520 
 521 /* --------------------------------------------------------------------------------------------- */
 522 
 523 int
 524 convert_from_8bit_to_utf_c2 (char input_char)
     /* [previous][next][first][last][top][bottom][index][help]  */
 525 {
 526     int ch = '.';
 527     GIConv conv;
 528     const char *cp_from;
 529 
 530     cp_from = get_codepage_id (mc_global.source_codepage);
 531 
 532     conv = str_crt_conv_to (cp_from);
 533     if (conv != INVALID_CONV)
 534     {
 535         ch = convert_from_8bit_to_utf_c (input_char, conv);
 536         str_close_conv (conv);
 537     }
 538 
 539     return ch;
 540 }
 541 
 542 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */