Manual pages: mcmcdiffmceditmcview

root/lib/charsets.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. new_codepage_desc
  2. free_codepage_desc
  3. load_codepages_list_from_file
  4. translate_character
  5. load_codepages_list
  6. free_codepages_list
  7. get_codepage_id
  8. get_codepage_name
  9. get_codepage_index
  10. is_supported_encoding
  11. init_translation_table
  12. str_nconvert_to_display
  13. str_nconvert_to_input
  14. convert_from_utf_to_current
  15. convert_from_utf_to_current_c
  16. convert_from_8bit_to_utf_c
  17. convert_from_8bit_to_utf_c2

   1 /*
   2    Text conversion from one charset to another.
   3 
   4    Copyright (C) 2001-2025
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Walery Studennikov <despair@sama.ru>
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25 
  26 /** \file charsets.c
  27  *  \brief Source: Text conversion from one charset to another
  28  */
  29 
  30 #include <config.h>
  31 
  32 #include <limits.h>  // MB_LEN_MAX
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 
  37 #include "lib/global.h"
  38 #include "lib/strutil.h"  // utf-8 functions
  39 #include "lib/fileloc.h"
  40 #include "lib/util.h"  // whitespace()
  41 
  42 #include "lib/charsets.h"
  43 
  44 /*** global variables ****************************************************************************/
  45 
  46 GPtrArray *codepages = NULL;
  47 
  48 unsigned char conv_displ[256];
  49 unsigned char conv_input[256];
  50 
  51 const char *cp_display = NULL;
  52 const char *cp_source = NULL;
  53 
  54 /*** file scope macro definitions ****************************************************************/
  55 
  56 #define UNKNCHAR '\001'
  57 
  58 /*** file scope type declarations ****************************************************************/
  59 
  60 typedef struct
  61 {
  62     char *id;
  63     char *name;
  64 } codepage_desc;
  65 
  66 /*** forward declarations (file scope functions) *************************************************/
  67 
  68 /*** file scope variables ************************************************************************/
  69 
  70 static const char NO_TRANSLATION[] = N_ ("No translation");
  71 
  72 /* --------------------------------------------------------------------------------------------- */
  73 /*** file scope functions ************************************************************************/
  74 /* --------------------------------------------------------------------------------------------- */
  75 
  76 static codepage_desc *
  77 new_codepage_desc (const char *id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
  78 {
  79     codepage_desc *desc;
  80 
  81     desc = g_new (codepage_desc, 1);
  82     desc->id = g_strdup (id);
  83     desc->name = g_strdup (name);
  84 
  85     return desc;
  86 }
  87 
  88 /* --------------------------------------------------------------------------------------------- */
  89 
  90 static void
  91 free_codepage_desc (gpointer data)
     /* [previous][next][first][last][top][bottom][index][help]  */
  92 {
  93     codepage_desc *desc = (codepage_desc *) data;
  94 
  95     g_free (desc->id);
  96     g_free (desc->name);
  97     g_free (desc);
  98 }
  99 
 100 /* --------------------------------------------------------------------------------------------- */
 101 /* returns display codepage */
 102 
 103 static void
 104 load_codepages_list_from_file (GPtrArray **list, const char *fname)
     /* [previous][next][first][last][top][bottom][index][help]  */
 105 {
 106     FILE *f;
 107     char buf[BUF_MEDIUM];
 108     char *default_codepage = NULL;
 109 
 110     f = fopen (fname, "r");
 111     if (f == NULL)
 112         return;
 113 
 114     while (fgets (buf, sizeof buf, f) != NULL)
 115     {
 116         // split string into id and cpname
 117         char *p = buf;
 118         size_t buflen;
 119 
 120         if (*p == '\n' || *p == '\0' || *p == '#')
 121             continue;
 122 
 123         buflen = strlen (buf);
 124 
 125         if (buflen != 0 && buf[buflen - 1] == '\n')
 126             buf[buflen - 1] = '\0';
 127         while (*p != '\0' && !whitespace (*p))
 128             ++p;
 129         if (*p == '\0')
 130             goto fail;
 131 
 132         *p++ = '\0';
 133         g_strstrip (p);
 134         if (*p == '\0')
 135             goto fail;
 136 
 137         if (strcmp (buf, "default") == 0)
 138             default_codepage = g_strdup (p);
 139         else
 140         {
 141             const char *id = buf;
 142 
 143             if (*list == NULL)
 144             {
 145                 *list = g_ptr_array_new_full (16, free_codepage_desc);
 146                 g_ptr_array_add (*list, new_codepage_desc (id, p));
 147             }
 148             else
 149             {
 150                 unsigned int i;
 151 
 152                 // whether id is already present in list
 153                 // if yes, overwrite description
 154                 for (i = 0; i < (*list)->len; i++)
 155                 {
 156                     codepage_desc *desc;
 157 
 158                     desc = (codepage_desc *) g_ptr_array_index (*list, i);
 159 
 160                     if (strcmp (id, desc->id) == 0)
 161                     {
 162                         // found
 163                         g_free (desc->name);
 164                         desc->name = g_strdup (p);
 165                         break;
 166                     }
 167                 }
 168 
 169                 // not found
 170                 if (i == (*list)->len)
 171                     g_ptr_array_add (*list, new_codepage_desc (id, p));
 172             }
 173         }
 174     }
 175 
 176     if (default_codepage != NULL)
 177     {
 178         mc_global.display_codepage = get_codepage_index (default_codepage);
 179         g_free (default_codepage);
 180     }
 181 
 182 fail:
 183     fclose (f);
 184 }
 185 
 186 /* --------------------------------------------------------------------------------------------- */
 187 
 188 static char
 189 translate_character (GIConv cd, char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 190 {
 191     gchar *tmp_buff = NULL;
 192     gsize bytes_read, bytes_written = 0;
 193     const char *ibuf = &c;
 194     char ch = UNKNCHAR;
 195     int ibuflen = 1;
 196 
 197     tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
 198     if (tmp_buff != NULL)
 199         ch = tmp_buff[0];
 200     g_free (tmp_buff);
 201     return ch;
 202 }
 203 
 204 /* --------------------------------------------------------------------------------------------- */
 205 /*** public functions ****************************************************************************/
 206 /* --------------------------------------------------------------------------------------------- */
 207 
 208 void
 209 load_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 210 {
 211     char *fname;
 212 
 213     // 1: try load /usr/share/mc/mc.charsets
 214     fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
 215     load_codepages_list_from_file (&codepages, fname);
 216     g_free (fname);
 217 
 218     // 2: try load /etc/mc/mc.charsets
 219     fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
 220     load_codepages_list_from_file (&codepages, fname);
 221     g_free (fname);
 222 
 223     if (codepages == NULL)
 224     {
 225         // files are not found, add default codepage
 226         fprintf (stderr, "%s\n", _ ("Warning: cannot load codepages list"));
 227 
 228         codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
 229         g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _ ("7-bit ASCII")));
 230     }
 231 }
 232 
 233 /* --------------------------------------------------------------------------------------------- */
 234 
 235 void
 236 free_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 237 {
 238     g_ptr_array_free (codepages, TRUE);
 239     // NULL-ize pointer to make unit tests happy
 240     codepages = NULL;
 241 }
 242 
 243 /* --------------------------------------------------------------------------------------------- */
 244 
 245 const char *
 246 get_codepage_id (const int n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 247 {
 248     return (n < 0) ? NO_TRANSLATION : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
 249 }
 250 
 251 /* --------------------------------------------------------------------------------------------- */
 252 
 253 const char *
 254 get_codepage_name (const int n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 255 {
 256     return (n < 0) ? _ (NO_TRANSLATION)
 257                    : ((codepage_desc *) g_ptr_array_index (codepages, n))->name;
 258 }
 259 
 260 /* --------------------------------------------------------------------------------------------- */
 261 
 262 int
 263 get_codepage_index (const char *id)
     /* [previous][next][first][last][top][bottom][index][help]  */
 264 {
 265     if (codepages == NULL)
 266         return -1;
 267     if (strcmp (id, NO_TRANSLATION) == 0)
 268         return -1;
 269     for (guint i = 0; i < codepages->len; i++)
 270         if (strcmp (id, get_codepage_id (i)) == 0)
 271             return (int) i;
 272     return -1;
 273 }
 274 
 275 /* --------------------------------------------------------------------------------------------- */
 276 /** Check if specified encoding can be used in mc.
 277  * @param encoding name of encoding
 278  * @return TRUE if encoding is supported by mc, FALSE otherwise
 279  */
 280 
 281 gboolean
 282 is_supported_encoding (const char *encoding)
     /* [previous][next][first][last][top][bottom][index][help]  */
 283 {
 284     GIConv coder;
 285     gboolean result;
 286 
 287     if (encoding == NULL)
 288         return FALSE;
 289 
 290     coder = str_crt_conv_from (encoding);
 291     result = coder != INVALID_CONV;
 292     if (result)
 293         str_close_conv (coder);
 294     return result;
 295 }
 296 
 297 /* --------------------------------------------------------------------------------------------- */
 298 
 299 char *
 300 init_translation_table (int cpsource, int cpdisplay)
     /* [previous][next][first][last][top][bottom][index][help]  */
 301 {
 302     int i;
 303     GIConv cd;
 304 
 305     // Fill input <-> display tables
 306 
 307     if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
 308     {
 309         for (i = 0; i <= 255; ++i)
 310         {
 311             conv_displ[i] = i;
 312             conv_input[i] = i;
 313         }
 314         cp_source = cp_display;
 315         return NULL;
 316     }
 317 
 318     for (i = 0; i <= 127; ++i)
 319     {
 320         conv_displ[i] = i;
 321         conv_input[i] = i;
 322     }
 323 
 324     cp_source = get_codepage_id (cpsource);
 325     cp_display = get_codepage_id (cpdisplay);
 326 
 327     // display <- input table
 328 
 329     cd = g_iconv_open (cp_display, cp_source);
 330     if (cd == INVALID_CONV)
 331         return g_strdup_printf (_ ("Cannot translate from %s to %s"), cp_source, cp_display);
 332 
 333     for (i = 128; i <= 255; ++i)
 334         conv_displ[i] = translate_character (cd, i);
 335 
 336     g_iconv_close (cd);
 337 
 338     // input <- display table
 339 
 340     cd = g_iconv_open (cp_source, cp_display);
 341     if (cd == INVALID_CONV)
 342         return g_strdup_printf (_ ("Cannot translate from %s to %s"), cp_display, cp_source);
 343 
 344     for (i = 128; i <= 255; ++i)
 345     {
 346         unsigned char ch;
 347         ch = translate_character (cd, i);
 348         conv_input[i] = (ch == UNKNCHAR) ? i : ch;
 349     }
 350 
 351     g_iconv_close (cd);
 352 
 353     return NULL;
 354 }
 355 
 356 /* --------------------------------------------------------------------------------------------- */
 357 
 358 GString *
 359 str_nconvert_to_display (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 360 {
 361     GString *buff;
 362     GIConv conv;
 363 
 364     if (str == NULL)
 365         return NULL;
 366 
 367     if (cp_display == cp_source)
 368         return g_string_new (str);
 369 
 370     conv = str_crt_conv_from (cp_source);
 371     if (conv == INVALID_CONV)
 372         return g_string_new (str);
 373 
 374     buff = g_string_new ("");
 375     str_nconvert (conv, str, len, buff);
 376     str_close_conv (conv);
 377     return buff;
 378 }
 379 
 380 /* --------------------------------------------------------------------------------------------- */
 381 
 382 GString *
 383 str_nconvert_to_input (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 384 {
 385     GString *buff;
 386     GIConv conv;
 387 
 388     if (str == NULL)
 389         return NULL;
 390 
 391     if (cp_display == cp_source)
 392         return g_string_new (str);
 393 
 394     conv = str_crt_conv_to (cp_source);
 395     if (conv == INVALID_CONV)
 396         return g_string_new (str);
 397 
 398     buff = g_string_new ("");
 399     str_nconvert (conv, str, len, buff);
 400     str_close_conv (conv);
 401     return buff;
 402 }
 403 
 404 /* --------------------------------------------------------------------------------------------- */
 405 
 406 unsigned char
 407 convert_from_utf_to_current (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 408 {
 409     unsigned char buf_ch[MB_LEN_MAX + 1];
 410     unsigned char ch = '.';
 411     GIConv conv;
 412     const char *cp_to;
 413 
 414     if (str == NULL)
 415         return '.';
 416 
 417     cp_to = get_codepage_id (mc_global.source_codepage);
 418     conv = str_crt_conv_to (cp_to);
 419 
 420     if (conv != INVALID_CONV)
 421     {
 422         switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
 423         {
 424         case ESTR_SUCCESS:
 425             ch = buf_ch[0];
 426             break;
 427         case ESTR_PROBLEM:
 428         case ESTR_FAILURE:
 429             ch = '.';
 430             break;
 431         default:
 432             break;
 433         }
 434         str_close_conv (conv);
 435     }
 436 
 437     return ch;
 438 }
 439 
 440 /* --------------------------------------------------------------------------------------------- */
 441 
 442 unsigned char
 443 convert_from_utf_to_current_c (int input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 444 {
 445     unsigned char str[MB_LEN_MAX + 1];
 446     unsigned char buf_ch[MB_LEN_MAX + 1];
 447     unsigned char ch = '.';
 448     int res;
 449 
 450     res = g_unichar_to_utf8 (input_char, (char *) str);
 451     if (res == 0)
 452         return ch;
 453 
 454     str[res] = '\0';
 455 
 456     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 457     {
 458     case ESTR_SUCCESS:
 459         ch = buf_ch[0];
 460         break;
 461     case ESTR_PROBLEM:
 462     case ESTR_FAILURE:
 463         ch = '.';
 464         break;
 465     default:
 466         break;
 467     }
 468 
 469     return ch;
 470 }
 471 
 472 /* --------------------------------------------------------------------------------------------- */
 473 
 474 int
 475 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 476 {
 477     unsigned char str[2];
 478     unsigned char buf_ch[MB_LEN_MAX + 1];
 479     int ch;
 480 
 481     str[0] = (unsigned char) input_char;
 482     str[1] = '\0';
 483 
 484     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 485     {
 486     case ESTR_SUCCESS:
 487     {
 488         int res;
 489 
 490         res = g_utf8_get_char_validated ((char *) buf_ch, -1);
 491         ch = res >= 0 ? res : buf_ch[0];
 492         break;
 493     }
 494     case ESTR_PROBLEM:
 495     case ESTR_FAILURE:
 496     default:
 497         ch = '.';
 498         break;
 499     }
 500 
 501     return ch;
 502 }
 503 
 504 /* --------------------------------------------------------------------------------------------- */
 505 
 506 int
 507 convert_from_8bit_to_utf_c2 (char input_char)
     /* [previous][next][first][last][top][bottom][index][help]  */
 508 {
 509     int ch = '.';
 510     GIConv conv;
 511     const char *cp_from;
 512 
 513     cp_from = get_codepage_id (mc_global.source_codepage);
 514 
 515     conv = str_crt_conv_to (cp_from);
 516     if (conv != INVALID_CONV)
 517     {
 518         ch = convert_from_8bit_to_utf_c (input_char, conv);
 519         str_close_conv (conv);
 520     }
 521 
 522     return ch;
 523 }
 524 
 525 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */