Manual pages: mcmcdiffmceditmcview

root/lib/charsets.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. new_codepage_desc
  2. free_codepage_desc
  3. load_codepages_list_from_file
  4. translate_character
  5. load_codepages_list
  6. free_codepages_list
  7. get_codepage_id
  8. get_codepage_index
  9. is_supported_encoding
  10. init_translation_table
  11. convert_to_display
  12. str_nconvert_to_display
  13. convert_from_input
  14. str_nconvert_to_input
  15. convert_from_utf_to_current
  16. convert_from_utf_to_current_c
  17. convert_from_8bit_to_utf_c
  18. convert_from_8bit_to_utf_c2

   1 /*
   2    Text conversion from one charset to another.
   3 
   4    Copyright (C) 2001-2025
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Walery Studennikov <despair@sama.ru>
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25 
  26 /** \file charsets.c
  27  *  \brief Source: Text conversion from one charset to another
  28  */
  29 
  30 #include <config.h>
  31 
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #include "lib/global.h"
  37 #include "lib/strutil.h"  // utf-8 functions
  38 #include "lib/fileloc.h"
  39 #include "lib/util.h"  // whitespace()
  40 
  41 #include "lib/charsets.h"
  42 
  43 /*** global variables ****************************************************************************/
  44 
  45 GPtrArray *codepages = NULL;
  46 
  47 unsigned char conv_displ[256];
  48 unsigned char conv_input[256];
  49 
  50 const char *cp_display = NULL;
  51 const char *cp_source = NULL;
  52 
  53 /*** file scope macro definitions ****************************************************************/
  54 
  55 #define UNKNCHAR       '\001'
  56 #define NO_TRANSLATION "No translation"
  57 
  58 /*** file scope type declarations ****************************************************************/
  59 
  60 /*** forward declarations (file scope functions) *************************************************/
  61 
  62 /*** file scope variables ************************************************************************/
  63 
  64 /* --------------------------------------------------------------------------------------------- */
  65 /*** file scope functions ************************************************************************/
  66 /* --------------------------------------------------------------------------------------------- */
  67 
  68 static codepage_desc *
  69 new_codepage_desc (const char *id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
  70 {
  71     codepage_desc *desc;
  72 
  73     desc = g_new (codepage_desc, 1);
  74     desc->id = g_strdup (id);
  75     desc->name = g_strdup (name);
  76 
  77     return desc;
  78 }
  79 
  80 /* --------------------------------------------------------------------------------------------- */
  81 
  82 static void
  83 free_codepage_desc (gpointer data)
     /* [previous][next][first][last][top][bottom][index][help]  */
  84 {
  85     codepage_desc *desc = (codepage_desc *) data;
  86 
  87     g_free (desc->id);
  88     g_free (desc->name);
  89     g_free (desc);
  90 }
  91 
  92 /* --------------------------------------------------------------------------------------------- */
  93 /* returns display codepage */
  94 
  95 static void
  96 load_codepages_list_from_file (GPtrArray **list, const char *fname)
     /* [previous][next][first][last][top][bottom][index][help]  */
  97 {
  98     FILE *f;
  99     char buf[BUF_MEDIUM];
 100     char *default_codepage = NULL;
 101 
 102     f = fopen (fname, "r");
 103     if (f == NULL)
 104         return;
 105 
 106     while (fgets (buf, sizeof buf, f) != NULL)
 107     {
 108         // split string into id and cpname
 109         char *p = buf;
 110         size_t buflen;
 111 
 112         if (*p == '\n' || *p == '\0' || *p == '#')
 113             continue;
 114 
 115         buflen = strlen (buf);
 116 
 117         if (buflen != 0 && buf[buflen - 1] == '\n')
 118             buf[buflen - 1] = '\0';
 119         while (*p != '\0' && !whitespace (*p))
 120             ++p;
 121         if (*p == '\0')
 122             goto fail;
 123 
 124         *p++ = '\0';
 125         g_strstrip (p);
 126         if (*p == '\0')
 127             goto fail;
 128 
 129         if (strcmp (buf, "default") == 0)
 130             default_codepage = g_strdup (p);
 131         else
 132         {
 133             const char *id = buf;
 134 
 135             if (*list == NULL)
 136             {
 137                 *list = g_ptr_array_new_full (16, free_codepage_desc);
 138                 g_ptr_array_add (*list, new_codepage_desc (id, p));
 139             }
 140             else
 141             {
 142                 unsigned int i;
 143 
 144                 // whether id is already present in list
 145                 // if yes, overwrite description
 146                 for (i = 0; i < (*list)->len; i++)
 147                 {
 148                     codepage_desc *desc;
 149 
 150                     desc = (codepage_desc *) g_ptr_array_index (*list, i);
 151 
 152                     if (strcmp (id, desc->id) == 0)
 153                     {
 154                         // found
 155                         g_free (desc->name);
 156                         desc->name = g_strdup (p);
 157                         break;
 158                     }
 159                 }
 160 
 161                 // not found
 162                 if (i == (*list)->len)
 163                     g_ptr_array_add (*list, new_codepage_desc (id, p));
 164             }
 165         }
 166     }
 167 
 168     if (default_codepage != NULL)
 169     {
 170         mc_global.display_codepage = get_codepage_index (default_codepage);
 171         g_free (default_codepage);
 172     }
 173 
 174 fail:
 175     fclose (f);
 176 }
 177 
 178 /* --------------------------------------------------------------------------------------------- */
 179 
 180 static char
 181 translate_character (GIConv cd, char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 182 {
 183     gchar *tmp_buff = NULL;
 184     gsize bytes_read, bytes_written = 0;
 185     const char *ibuf = &c;
 186     char ch = UNKNCHAR;
 187     int ibuflen = 1;
 188 
 189     tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
 190     if (tmp_buff != NULL)
 191         ch = tmp_buff[0];
 192     g_free (tmp_buff);
 193     return ch;
 194 }
 195 
 196 /* --------------------------------------------------------------------------------------------- */
 197 /*** public functions ****************************************************************************/
 198 /* --------------------------------------------------------------------------------------------- */
 199 
 200 void
 201 load_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 202 {
 203     char *fname;
 204 
 205     // 1: try load /usr/share/mc/mc.charsets
 206     fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
 207     load_codepages_list_from_file (&codepages, fname);
 208     g_free (fname);
 209 
 210     // 2: try load /etc/mc/mc.charsets
 211     fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
 212     load_codepages_list_from_file (&codepages, fname);
 213     g_free (fname);
 214 
 215     if (codepages == NULL)
 216     {
 217         // files are not found, add default codepage
 218         fprintf (stderr, "%s\n", _ ("Warning: cannot load codepages list"));
 219 
 220         codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
 221         g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _ ("7-bit ASCII")));
 222     }
 223 }
 224 
 225 /* --------------------------------------------------------------------------------------------- */
 226 
 227 void
 228 free_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 229 {
 230     g_ptr_array_free (codepages, TRUE);
 231     // NULL-ize pointer to make unit tests happy
 232     codepages = NULL;
 233 }
 234 
 235 /* --------------------------------------------------------------------------------------------- */
 236 
 237 const char *
 238 get_codepage_id (const int n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 239 {
 240     return (n < 0) ? NO_TRANSLATION : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
 241 }
 242 
 243 /* --------------------------------------------------------------------------------------------- */
 244 
 245 int
 246 get_codepage_index (const char *id)
     /* [previous][next][first][last][top][bottom][index][help]  */
 247 {
 248     if (codepages == NULL)
 249         return -1;
 250     if (strcmp (id, NO_TRANSLATION) == 0)
 251         return -1;
 252     for (guint i = 0; i < codepages->len; i++)
 253         if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
 254             return (int) i;
 255     return -1;
 256 }
 257 
 258 /* --------------------------------------------------------------------------------------------- */
 259 /** Check if specified encoding can be used in mc.
 260  * @param encoding name of encoding
 261  * @return TRUE if encoding is supported by mc, FALSE otherwise
 262  */
 263 
 264 gboolean
 265 is_supported_encoding (const char *encoding)
     /* [previous][next][first][last][top][bottom][index][help]  */
 266 {
 267     GIConv coder;
 268     gboolean result;
 269 
 270     if (encoding == NULL)
 271         return FALSE;
 272 
 273     coder = str_crt_conv_from (encoding);
 274     result = coder != INVALID_CONV;
 275     if (result)
 276         str_close_conv (coder);
 277     return result;
 278 }
 279 
 280 /* --------------------------------------------------------------------------------------------- */
 281 
 282 char *
 283 init_translation_table (int cpsource, int cpdisplay)
     /* [previous][next][first][last][top][bottom][index][help]  */
 284 {
 285     int i;
 286     GIConv cd;
 287 
 288     // Fill input <-> display tables
 289 
 290     if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
 291     {
 292         for (i = 0; i <= 255; ++i)
 293         {
 294             conv_displ[i] = i;
 295             conv_input[i] = i;
 296         }
 297         cp_source = cp_display;
 298         return NULL;
 299     }
 300 
 301     for (i = 0; i <= 127; ++i)
 302     {
 303         conv_displ[i] = i;
 304         conv_input[i] = i;
 305     }
 306     cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
 307     cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
 308 
 309     // display <- inpit table
 310 
 311     cd = g_iconv_open (cp_display, cp_source);
 312     if (cd == INVALID_CONV)
 313         return g_strdup_printf (_ ("Cannot translate from %s to %s"), cp_source, cp_display);
 314 
 315     for (i = 128; i <= 255; ++i)
 316         conv_displ[i] = translate_character (cd, i);
 317 
 318     g_iconv_close (cd);
 319 
 320     // inpit <- display table
 321 
 322     cd = g_iconv_open (cp_source, cp_display);
 323     if (cd == INVALID_CONV)
 324         return g_strdup_printf (_ ("Cannot translate from %s to %s"), cp_display, cp_source);
 325 
 326     for (i = 128; i <= 255; ++i)
 327     {
 328         unsigned char ch;
 329         ch = translate_character (cd, i);
 330         conv_input[i] = (ch == UNKNCHAR) ? i : ch;
 331     }
 332 
 333     g_iconv_close (cd);
 334 
 335     return NULL;
 336 }
 337 
 338 /* --------------------------------------------------------------------------------------------- */
 339 
 340 void
 341 convert_to_display (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 342 {
 343     if (str != NULL)
 344         for (; *str != '\0'; str++)
 345             *str = conv_displ[(unsigned char) *str];
 346 }
 347 
 348 /* --------------------------------------------------------------------------------------------- */
 349 
 350 GString *
 351 str_nconvert_to_display (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 352 {
 353     GString *buff;
 354     GIConv conv;
 355 
 356     if (str == NULL)
 357         return NULL;
 358 
 359     if (cp_display == cp_source)
 360         return g_string_new (str);
 361 
 362     conv = str_crt_conv_from (cp_source);
 363     if (conv == INVALID_CONV)
 364         return g_string_new (str);
 365 
 366     buff = g_string_new ("");
 367     str_nconvert (conv, str, len, buff);
 368     str_close_conv (conv);
 369     return buff;
 370 }
 371 
 372 /* --------------------------------------------------------------------------------------------- */
 373 
 374 void
 375 convert_from_input (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 376 {
 377     if (str != NULL)
 378         for (; *str != '\0'; str++)
 379             *str = conv_input[(unsigned char) *str];
 380 }
 381 
 382 /* --------------------------------------------------------------------------------------------- */
 383 
 384 GString *
 385 str_nconvert_to_input (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 386 {
 387     GString *buff;
 388     GIConv conv;
 389 
 390     if (str == NULL)
 391         return NULL;
 392 
 393     if (cp_display == cp_source)
 394         return g_string_new (str);
 395 
 396     conv = str_crt_conv_to (cp_source);
 397     if (conv == INVALID_CONV)
 398         return g_string_new (str);
 399 
 400     buff = g_string_new ("");
 401     str_nconvert (conv, str, len, buff);
 402     str_close_conv (conv);
 403     return buff;
 404 }
 405 
 406 /* --------------------------------------------------------------------------------------------- */
 407 
 408 unsigned char
 409 convert_from_utf_to_current (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 410 {
 411     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 412     unsigned char ch = '.';
 413     GIConv conv;
 414     const char *cp_to;
 415 
 416     if (str == NULL)
 417         return '.';
 418 
 419     cp_to = get_codepage_id (mc_global.source_codepage);
 420     conv = str_crt_conv_to (cp_to);
 421 
 422     if (conv != INVALID_CONV)
 423     {
 424         switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
 425         {
 426         case ESTR_SUCCESS:
 427             ch = buf_ch[0];
 428             break;
 429         case ESTR_PROBLEM:
 430         case ESTR_FAILURE:
 431             ch = '.';
 432             break;
 433         default:
 434             break;
 435         }
 436         str_close_conv (conv);
 437     }
 438 
 439     return ch;
 440 }
 441 
 442 /* --------------------------------------------------------------------------------------------- */
 443 
 444 unsigned char
 445 convert_from_utf_to_current_c (int input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 446 {
 447     unsigned char str[UTF8_CHAR_LEN + 1];
 448     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 449     unsigned char ch = '.';
 450     int res;
 451 
 452     res = g_unichar_to_utf8 (input_char, (char *) str);
 453     if (res == 0)
 454         return ch;
 455 
 456     str[res] = '\0';
 457 
 458     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 459     {
 460     case ESTR_SUCCESS:
 461         ch = buf_ch[0];
 462         break;
 463     case ESTR_PROBLEM:
 464     case ESTR_FAILURE:
 465         ch = '.';
 466         break;
 467     default:
 468         break;
 469     }
 470 
 471     return ch;
 472 }
 473 
 474 /* --------------------------------------------------------------------------------------------- */
 475 
 476 int
 477 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 478 {
 479     unsigned char str[2];
 480     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 481     int ch;
 482 
 483     str[0] = (unsigned char) input_char;
 484     str[1] = '\0';
 485 
 486     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 487     {
 488     case ESTR_SUCCESS:
 489     {
 490         int res;
 491 
 492         res = g_utf8_get_char_validated ((char *) buf_ch, -1);
 493         ch = res >= 0 ? res : buf_ch[0];
 494         break;
 495     }
 496     case ESTR_PROBLEM:
 497     case ESTR_FAILURE:
 498     default:
 499         ch = '.';
 500         break;
 501     }
 502 
 503     return ch;
 504 }
 505 
 506 /* --------------------------------------------------------------------------------------------- */
 507 
 508 int
 509 convert_from_8bit_to_utf_c2 (char input_char)
     /* [previous][next][first][last][top][bottom][index][help]  */
 510 {
 511     int ch = '.';
 512     GIConv conv;
 513     const char *cp_from;
 514 
 515     cp_from = get_codepage_id (mc_global.source_codepage);
 516 
 517     conv = str_crt_conv_to (cp_from);
 518     if (conv != INVALID_CONV)
 519     {
 520         ch = convert_from_8bit_to_utf_c (input_char, conv);
 521         str_close_conv (conv);
 522     }
 523 
 524     return ch;
 525 }
 526 
 527 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */