root/lib/charsets.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. new_codepage_desc
  2. free_codepage_desc
  3. load_codepages_list_from_file
  4. translate_character
  5. load_codepages_list
  6. free_codepages_list
  7. get_codepage_id
  8. get_codepage_index
  9. is_supported_encoding
  10. init_translation_table
  11. convert_to_display
  12. str_nconvert_to_display
  13. convert_from_input
  14. str_nconvert_to_input
  15. convert_from_utf_to_current
  16. convert_from_utf_to_current_c
  17. convert_from_8bit_to_utf_c
  18. convert_from_8bit_to_utf_c2

   1 /*
   2    Text conversion from one charset to another.
   3 
   4    Copyright (C) 2001-2024
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Walery Studennikov <despair@sama.ru>
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25 
  26 /** \file charsets.c
  27  *  \brief Source: Text conversion from one charset to another
  28  */
  29 
  30 #include <config.h>
  31 
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #include "lib/global.h"
  37 #include "lib/strutil.h"        /* utf-8 functions */
  38 #include "lib/fileloc.h"
  39 #include "lib/util.h"           /* whitespace() */
  40 
  41 #include "lib/charsets.h"
  42 
  43 /*** global variables ****************************************************************************/
  44 
  45 GPtrArray *codepages = NULL;
  46 
  47 unsigned char conv_displ[256];
  48 unsigned char conv_input[256];
  49 
  50 const char *cp_display = NULL;
  51 const char *cp_source = NULL;
  52 
  53 /*** file scope macro definitions ****************************************************************/
  54 
  55 #define UNKNCHAR '\001'
  56 
  57 #define OTHER_8BIT "Other_8_bit"
  58 
  59 /*** file scope type declarations ****************************************************************/
  60 
  61 /*** forward declarations (file scope functions) *************************************************/
  62 
  63 /*** file scope variables ************************************************************************/
  64 
  65 /* --------------------------------------------------------------------------------------------- */
  66 /*** file scope functions ************************************************************************/
  67 /* --------------------------------------------------------------------------------------------- */
  68 
  69 static codepage_desc *
  70 new_codepage_desc (const char *id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
  71 {
  72     codepage_desc *desc;
  73 
  74     desc = g_new (codepage_desc, 1);
  75     desc->id = g_strdup (id);
  76     desc->name = g_strdup (name);
  77 
  78     return desc;
  79 }
  80 
  81 /* --------------------------------------------------------------------------------------------- */
  82 
  83 static void
  84 free_codepage_desc (gpointer data)
     /* [previous][next][first][last][top][bottom][index][help]  */
  85 {
  86     codepage_desc *desc = (codepage_desc *) data;
  87 
  88     g_free (desc->id);
  89     g_free (desc->name);
  90     g_free (desc);
  91 }
  92 
  93 /* --------------------------------------------------------------------------------------------- */
  94 /* returns display codepage */
  95 
  96 static void
  97 load_codepages_list_from_file (GPtrArray **list, const char *fname)
     /* [previous][next][first][last][top][bottom][index][help]  */
  98 {
  99     FILE *f;
 100     char buf[BUF_MEDIUM];
 101     char *default_codepage = NULL;
 102 
 103     f = fopen (fname, "r");
 104     if (f == NULL)
 105         return;
 106 
 107     while (fgets (buf, sizeof buf, f) != NULL)
 108     {
 109         /* split string into id and cpname */
 110         char *p = buf;
 111         size_t buflen;
 112 
 113         if (*p == '\n' || *p == '\0' || *p == '#')
 114             continue;
 115 
 116         buflen = strlen (buf);
 117 
 118         if (buflen != 0 && buf[buflen - 1] == '\n')
 119             buf[buflen - 1] = '\0';
 120         while (*p != '\0' && !whitespace (*p))
 121             ++p;
 122         if (*p == '\0')
 123             goto fail;
 124 
 125         *p++ = '\0';
 126         g_strstrip (p);
 127         if (*p == '\0')
 128             goto fail;
 129 
 130         if (strcmp (buf, "default") == 0)
 131             default_codepage = g_strdup (p);
 132         else
 133         {
 134             const char *id = buf;
 135 
 136             if (*list == NULL)
 137             {
 138                 *list = g_ptr_array_new_full (16, free_codepage_desc);
 139                 g_ptr_array_add (*list, new_codepage_desc (id, p));
 140             }
 141             else
 142             {
 143                 unsigned int i;
 144 
 145                 /* whether id is already present in list */
 146                 /* if yes, overwrite description */
 147                 for (i = 0; i < (*list)->len; i++)
 148                 {
 149                     codepage_desc *desc;
 150 
 151                     desc = (codepage_desc *) g_ptr_array_index (*list, i);
 152 
 153                     if (strcmp (id, desc->id) == 0)
 154                     {
 155                         /* found */
 156                         g_free (desc->name);
 157                         desc->name = g_strdup (p);
 158                         break;
 159                     }
 160                 }
 161 
 162                 /* not found */
 163                 if (i == (*list)->len)
 164                     g_ptr_array_add (*list, new_codepage_desc (id, p));
 165             }
 166         }
 167     }
 168 
 169     if (default_codepage != NULL)
 170     {
 171         mc_global.display_codepage = get_codepage_index (default_codepage);
 172         g_free (default_codepage);
 173     }
 174 
 175   fail:
 176     fclose (f);
 177 }
 178 
 179 /* --------------------------------------------------------------------------------------------- */
 180 
 181 static char
 182 translate_character (GIConv cd, char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 183 {
 184     gchar *tmp_buff = NULL;
 185     gsize bytes_read, bytes_written = 0;
 186     const char *ibuf = &c;
 187     char ch = UNKNCHAR;
 188     int ibuflen = 1;
 189 
 190     tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
 191     if (tmp_buff != NULL)
 192         ch = tmp_buff[0];
 193     g_free (tmp_buff);
 194     return ch;
 195 }
 196 
 197 /* --------------------------------------------------------------------------------------------- */
 198 /*** public functions ****************************************************************************/
 199 /* --------------------------------------------------------------------------------------------- */
 200 
 201 void
 202 load_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 203 {
 204     char *fname;
 205 
 206     /* 1: try load /usr/share/mc/mc.charsets */
 207     fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
 208     load_codepages_list_from_file (&codepages, fname);
 209     g_free (fname);
 210 
 211     /* 2: try load /etc/mc/mc.charsets */
 212     fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
 213     load_codepages_list_from_file (&codepages, fname);
 214     g_free (fname);
 215 
 216     if (codepages == NULL)
 217     {
 218         /* files are not found, add default codepage */
 219         fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
 220 
 221         codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
 222         g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
 223     }
 224 }
 225 
 226 /* --------------------------------------------------------------------------------------------- */
 227 
 228 void
 229 free_codepages_list (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 230 {
 231     g_ptr_array_free (codepages, TRUE);
 232     /* NULL-ize pointer to make unit tests happy */
 233     codepages = NULL;
 234 }
 235 
 236 /* --------------------------------------------------------------------------------------------- */
 237 
 238 const char *
 239 get_codepage_id (const int n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 240 {
 241     return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
 242 }
 243 
 244 /* --------------------------------------------------------------------------------------------- */
 245 
 246 int
 247 get_codepage_index (const char *id)
     /* [previous][next][first][last][top][bottom][index][help]  */
 248 {
 249     size_t i;
 250 
 251     if (codepages == NULL)
 252         return -1;
 253     if (strcmp (id, OTHER_8BIT) == 0)
 254         return -1;
 255     for (i = 0; i < codepages->len; i++)
 256         if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
 257             return i;
 258     return -1;
 259 }
 260 
 261 /* --------------------------------------------------------------------------------------------- */
 262 /** Check if specified encoding can be used in mc.
 263  * @param encoding name of encoding
 264  * @return TRUE if encoding is supported by mc, FALSE otherwise
 265  */
 266 
 267 gboolean
 268 is_supported_encoding (const char *encoding)
     /* [previous][next][first][last][top][bottom][index][help]  */
 269 {
 270     GIConv coder;
 271     gboolean result;
 272 
 273     if (encoding == NULL)
 274         return FALSE;
 275 
 276     coder = str_crt_conv_from (encoding);
 277     result = coder != INVALID_CONV;
 278     if (result)
 279         str_close_conv (coder);
 280     return result;
 281 }
 282 
 283 /* --------------------------------------------------------------------------------------------- */
 284 
 285 char *
 286 init_translation_table (int cpsource, int cpdisplay)
     /* [previous][next][first][last][top][bottom][index][help]  */
 287 {
 288     int i;
 289     GIConv cd;
 290 
 291     /* Fill input <-> display tables */
 292 
 293     if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
 294     {
 295         for (i = 0; i <= 255; ++i)
 296         {
 297             conv_displ[i] = i;
 298             conv_input[i] = i;
 299         }
 300         cp_source = cp_display;
 301         return NULL;
 302     }
 303 
 304     for (i = 0; i <= 127; ++i)
 305     {
 306         conv_displ[i] = i;
 307         conv_input[i] = i;
 308     }
 309     cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
 310     cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
 311 
 312     /* display <- inpit table */
 313 
 314     cd = g_iconv_open (cp_display, cp_source);
 315     if (cd == INVALID_CONV)
 316         return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
 317 
 318     for (i = 128; i <= 255; ++i)
 319         conv_displ[i] = translate_character (cd, i);
 320 
 321     g_iconv_close (cd);
 322 
 323     /* inpit <- display table */
 324 
 325     cd = g_iconv_open (cp_source, cp_display);
 326     if (cd == INVALID_CONV)
 327         return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
 328 
 329     for (i = 128; i <= 255; ++i)
 330     {
 331         unsigned char ch;
 332         ch = translate_character (cd, i);
 333         conv_input[i] = (ch == UNKNCHAR) ? i : ch;
 334     }
 335 
 336     g_iconv_close (cd);
 337 
 338     return NULL;
 339 }
 340 
 341 /* --------------------------------------------------------------------------------------------- */
 342 
 343 void
 344 convert_to_display (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 345 {
 346     if (str != NULL)
 347         for (; *str != '\0'; str++)
 348             *str = conv_displ[(unsigned char) *str];
 349 }
 350 
 351 /* --------------------------------------------------------------------------------------------- */
 352 
 353 GString *
 354 str_nconvert_to_display (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 355 {
 356     GString *buff;
 357     GIConv conv;
 358 
 359     if (str == NULL)
 360         return NULL;
 361 
 362     if (cp_display == cp_source)
 363         return g_string_new (str);
 364 
 365     conv = str_crt_conv_from (cp_source);
 366     if (conv == INVALID_CONV)
 367         return g_string_new (str);
 368 
 369     buff = g_string_new ("");
 370     str_nconvert (conv, str, len, buff);
 371     str_close_conv (conv);
 372     return buff;
 373 }
 374 
 375 /* --------------------------------------------------------------------------------------------- */
 376 
 377 void
 378 convert_from_input (char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 379 {
 380     if (str != NULL)
 381         for (; *str != '\0'; str++)
 382             *str = conv_input[(unsigned char) *str];
 383 }
 384 
 385 /* --------------------------------------------------------------------------------------------- */
 386 
 387 GString *
 388 str_nconvert_to_input (const char *str, int len)
     /* [previous][next][first][last][top][bottom][index][help]  */
 389 {
 390     GString *buff;
 391     GIConv conv;
 392 
 393     if (str == NULL)
 394         return NULL;
 395 
 396     if (cp_display == cp_source)
 397         return g_string_new (str);
 398 
 399     conv = str_crt_conv_to (cp_source);
 400     if (conv == INVALID_CONV)
 401         return g_string_new (str);
 402 
 403     buff = g_string_new ("");
 404     str_nconvert (conv, str, len, buff);
 405     str_close_conv (conv);
 406     return buff;
 407 }
 408 
 409 /* --------------------------------------------------------------------------------------------- */
 410 
 411 unsigned char
 412 convert_from_utf_to_current (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 413 {
 414     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 415     unsigned char ch = '.';
 416     GIConv conv;
 417     const char *cp_to;
 418 
 419     if (str == NULL)
 420         return '.';
 421 
 422     cp_to = get_codepage_id (mc_global.source_codepage);
 423     conv = str_crt_conv_to (cp_to);
 424 
 425     if (conv != INVALID_CONV)
 426     {
 427         switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
 428         {
 429         case ESTR_SUCCESS:
 430             ch = buf_ch[0];
 431             break;
 432         case ESTR_PROBLEM:
 433         case ESTR_FAILURE:
 434             ch = '.';
 435             break;
 436         default:
 437             break;
 438         }
 439         str_close_conv (conv);
 440     }
 441 
 442     return ch;
 443 }
 444 
 445 /* --------------------------------------------------------------------------------------------- */
 446 
 447 unsigned char
 448 convert_from_utf_to_current_c (int input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 449 {
 450     unsigned char str[UTF8_CHAR_LEN + 1];
 451     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 452     unsigned char ch = '.';
 453     int res;
 454 
 455     res = g_unichar_to_utf8 (input_char, (char *) str);
 456     if (res == 0)
 457         return ch;
 458 
 459     str[res] = '\0';
 460 
 461     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 462     {
 463     case ESTR_SUCCESS:
 464         ch = buf_ch[0];
 465         break;
 466     case ESTR_PROBLEM:
 467     case ESTR_FAILURE:
 468         ch = '.';
 469         break;
 470     default:
 471         break;
 472     }
 473 
 474     return ch;
 475 }
 476 
 477 /* --------------------------------------------------------------------------------------------- */
 478 
 479 int
 480 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
     /* [previous][next][first][last][top][bottom][index][help]  */
 481 {
 482     unsigned char str[2];
 483     unsigned char buf_ch[UTF8_CHAR_LEN + 1];
 484     int ch;
 485 
 486     str[0] = (unsigned char) input_char;
 487     str[1] = '\0';
 488 
 489     switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
 490     {
 491     case ESTR_SUCCESS:
 492         {
 493             int res;
 494 
 495             res = g_utf8_get_char_validated ((char *) buf_ch, -1);
 496             ch = res >= 0 ? res : buf_ch[0];
 497             break;
 498         }
 499     case ESTR_PROBLEM:
 500     case ESTR_FAILURE:
 501     default:
 502         ch = '.';
 503         break;
 504     }
 505 
 506     return ch;
 507 }
 508 
 509 /* --------------------------------------------------------------------------------------------- */
 510 
 511 int
 512 convert_from_8bit_to_utf_c2 (char input_char)
     /* [previous][next][first][last][top][bottom][index][help]  */
 513 {
 514     int ch = '.';
 515     GIConv conv;
 516     const char *cp_from;
 517 
 518     cp_from = get_codepage_id (mc_global.source_codepage);
 519 
 520     conv = str_crt_conv_to (cp_from);
 521     if (conv != INVALID_CONV)
 522     {
 523         ch = convert_from_8bit_to_utf_c (input_char, conv);
 524         str_close_conv (conv);
 525     }
 526 
 527     return ch;
 528 }
 529 
 530 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */