Manual pages: mcmcdiffmceditmcview

root/lib/strutil/strutilutf8.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. str_unichar_iscombiningmark
  2. str_utf8_insert_replace_char
  3. str_utf8_is_valid_string
  4. str_utf8_is_valid_char
  5. str_utf8_cnext_char
  6. str_utf8_cprev_char
  7. str_utf8_cnext_char_safe
  8. str_utf8_cprev_char_safe
  9. str_utf8_fix_string
  10. str_utf8_isspace
  11. str_utf8_ispunct
  12. str_utf8_isalnum
  13. str_utf8_isdigit
  14. str_utf8_isprint
  15. str_utf8_iscombiningmark
  16. str_utf8_cnext_noncomb_char
  17. str_utf8_cprev_noncomb_char
  18. str_utf8_toupper
  19. str_utf8_tolower
  20. str_utf8_length
  21. str_utf8_length2
  22. str_utf8_length_noncomb
  23. str_utf8_questmark_sustb
  24. str_utf8_conv_gerror_message
  25. str_utf8_vfs_convert_to
  26. str_utf8_make_make_term_form
  27. str_utf8_term_form
  28. utf8_tool_copy_chars_to_end
  29. utf8_tool_copy_chars_to
  30. utf8_tool_insert_space
  31. utf8_tool_insert_char
  32. utf8_tool_skip_chars_to
  33. utf8_tool_compose
  34. str_utf8_fit_to_term
  35. str_utf8_term_trim
  36. str_utf8_term_width2
  37. str_utf8_term_width1
  38. str_utf8_term_char_width
  39. str_utf8_term_substring
  40. str_utf8_trunc
  41. str_utf8_offset_to_pos
  42. str_utf8_column_to_pos
  43. str_utf8_create_search_needle
  44. str_utf8_release_search_needle
  45. str_utf8_search_first
  46. str_utf8_search_last
  47. str_utf8_normalize
  48. str_utf8_casefold_normalize
  49. str_utf8_compare
  50. str_utf8_ncompare
  51. str_utf8_casecmp
  52. str_utf8_ncasecmp
  53. str_utf8_prefix
  54. str_utf8_caseprefix
  55. str_utf8_create_key_gen
  56. str_utf8_create_key
  57. str_utf8_create_key_for_filename
  58. str_utf8_key_collate
  59. str_utf8_release_key
  60. str_utf8_init

   1 /*
   2    UTF-8 strings utilities
   3 
   4    Copyright (C) 2007-2025
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Rostislav Benes, 2007
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25 
  26 #include <config.h>
  27 
  28 #include <stdlib.h>
  29 #include <langinfo.h>
  30 #include <limits.h>  // MB_LEN_MAX, SIZE_MAX
  31 #include <string.h>
  32 
  33 #include "lib/global.h"
  34 #include "lib/strutil.h"
  35 
  36 /* using function for utf-8 from glib */
  37 
  38 /*** global variables ****************************************************************************/
  39 
  40 /*** file scope macro definitions ****************************************************************/
  41 
  42 /*** file scope type declarations ****************************************************************/
  43 
  44 struct utf8_tool
  45 {
  46     char *actual;
  47     size_t remain;
  48     const char *checked;
  49     int ident;
  50     gboolean compose;
  51 };
  52 
  53 struct term_form
  54 {
  55     char text[BUF_MEDIUM * MB_LEN_MAX];
  56     size_t width;
  57     gboolean compose;
  58 };
  59 
  60 /*** forward declarations (file scope functions) *************************************************/
  61 
  62 /*** file scope variables ************************************************************************/
  63 
  64 static const char replch[] = "\xEF\xBF\xBD";
  65 
  66 /* --------------------------------------------------------------------------------------------- */
  67 /*** file scope functions ************************************************************************/
  68 /* --------------------------------------------------------------------------------------------- */
  69 
  70 static gboolean
  71 str_unichar_iscombiningmark (gunichar uni)
     /* [previous][next][first][last][top][bottom][index][help]  */
  72 {
  73     GUnicodeType type;
  74 
  75     type = g_unichar_type (uni);
  76     return (type == G_UNICODE_SPACING_MARK) || (type == G_UNICODE_ENCLOSING_MARK)
  77         || (type == G_UNICODE_NON_SPACING_MARK);
  78 }
  79 
  80 /* --------------------------------------------------------------------------------------------- */
  81 
  82 static void
  83 str_utf8_insert_replace_char (GString *buffer)
     /* [previous][next][first][last][top][bottom][index][help]  */
  84 {
  85     g_string_append (buffer, replch);
  86 }
  87 
  88 /* --------------------------------------------------------------------------------------------- */
  89 
  90 static gboolean
  91 str_utf8_is_valid_string (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
  92 {
  93     return g_utf8_validate (text, -1, NULL);
  94 }
  95 
  96 /* --------------------------------------------------------------------------------------------- */
  97 
  98 static int
  99 str_utf8_is_valid_char (const char *ch, size_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 100 {
 101     switch (g_utf8_get_char_validated (ch, size))
 102     {
 103     case (gunichar) (-2):
 104         return (-2);
 105     case (gunichar) (-1):
 106         return (-1);
 107     default:
 108         return 1;
 109     }
 110 }
 111 
 112 /* --------------------------------------------------------------------------------------------- */
 113 
 114 static void
 115 str_utf8_cnext_char (const char **text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 116 {
 117     (*text) = g_utf8_next_char (*text);
 118 }
 119 
 120 /* --------------------------------------------------------------------------------------------- */
 121 
 122 static void
 123 str_utf8_cprev_char (const char **text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 124 {
 125     (*text) = g_utf8_prev_char (*text);
 126 }
 127 
 128 /* --------------------------------------------------------------------------------------------- */
 129 
 130 static void
 131 str_utf8_cnext_char_safe (const char **text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 132 {
 133     if (str_utf8_is_valid_char (*text, -1) == 1)
 134         (*text) = g_utf8_next_char (*text);
 135     else
 136         (*text)++;
 137 }
 138 
 139 /* --------------------------------------------------------------------------------------------- */
 140 
 141 static void
 142 str_utf8_cprev_char_safe (const char **text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 143 {
 144     const char *result, *t;
 145 
 146     result = g_utf8_prev_char (*text);
 147     t = result;
 148     str_utf8_cnext_char_safe (&t);
 149     if (t == *text)
 150         (*text) = result;
 151     else
 152         (*text)--;
 153 }
 154 
 155 /* --------------------------------------------------------------------------------------------- */
 156 
 157 static void
 158 str_utf8_fix_string (char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 159 {
 160     while (text[0] != '\0')
 161     {
 162         gunichar uni;
 163 
 164         uni = g_utf8_get_char_validated (text, -1);
 165         if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
 166             text = g_utf8_next_char (text);
 167         else
 168         {
 169             text[0] = '?';
 170             text++;
 171         }
 172     }
 173 }
 174 
 175 /* --------------------------------------------------------------------------------------------- */
 176 
 177 static gboolean
 178 str_utf8_isspace (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 179 {
 180     gunichar uni;
 181 
 182     uni = g_utf8_get_char_validated (text, -1);
 183     return g_unichar_isspace (uni);
 184 }
 185 
 186 /* --------------------------------------------------------------------------------------------- */
 187 
 188 static gboolean
 189 str_utf8_ispunct (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 190 {
 191     gunichar uni;
 192 
 193     uni = g_utf8_get_char_validated (text, -1);
 194     return g_unichar_ispunct (uni);
 195 }
 196 
 197 /* --------------------------------------------------------------------------------------------- */
 198 
 199 static gboolean
 200 str_utf8_isalnum (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 201 {
 202     gunichar uni;
 203 
 204     uni = g_utf8_get_char_validated (text, -1);
 205     return g_unichar_isalnum (uni);
 206 }
 207 
 208 /* --------------------------------------------------------------------------------------------- */
 209 
 210 static gboolean
 211 str_utf8_isdigit (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 212 {
 213     gunichar uni;
 214 
 215     uni = g_utf8_get_char_validated (text, -1);
 216     return g_unichar_isdigit (uni);
 217 }
 218 
 219 /* --------------------------------------------------------------------------------------------- */
 220 
 221 static gboolean
 222 str_utf8_isprint (const char *ch)
     /* [previous][next][first][last][top][bottom][index][help]  */
 223 {
 224     gunichar uni;
 225 
 226     uni = g_utf8_get_char_validated (ch, -1);
 227     return g_unichar_isprint (uni);
 228 }
 229 
 230 /* --------------------------------------------------------------------------------------------- */
 231 
 232 static gboolean
 233 str_utf8_iscombiningmark (const char *ch)
     /* [previous][next][first][last][top][bottom][index][help]  */
 234 {
 235     gunichar uni;
 236 
 237     uni = g_utf8_get_char_validated (ch, -1);
 238     return str_unichar_iscombiningmark (uni);
 239 }
 240 
 241 /* --------------------------------------------------------------------------------------------- */
 242 
 243 static int
 244 str_utf8_cnext_noncomb_char (const char **text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 245 {
 246     int count = 0;
 247 
 248     while ((*text)[0] != '\0')
 249     {
 250         str_utf8_cnext_char_safe (text);
 251         count++;
 252         if (!str_utf8_iscombiningmark (*text))
 253             break;
 254     }
 255 
 256     return count;
 257 }
 258 
 259 /* --------------------------------------------------------------------------------------------- */
 260 
 261 static int
 262 str_utf8_cprev_noncomb_char (const char **text, const char *begin)
     /* [previous][next][first][last][top][bottom][index][help]  */
 263 {
 264     int count = 0;
 265 
 266     while ((*text) != begin)
 267     {
 268         str_utf8_cprev_char_safe (text);
 269         count++;
 270         if (!str_utf8_iscombiningmark (*text))
 271             break;
 272     }
 273 
 274     return count;
 275 }
 276 
 277 /* --------------------------------------------------------------------------------------------- */
 278 
 279 static gboolean
 280 str_utf8_toupper (const char *text, char **out, size_t *remain)
     /* [previous][next][first][last][top][bottom][index][help]  */
 281 {
 282     gunichar uni;
 283     size_t left;
 284 
 285     uni = g_utf8_get_char_validated (text, -1);
 286     if (uni == (gunichar) (-1) || uni == (gunichar) (-2))
 287         return FALSE;
 288 
 289     uni = g_unichar_toupper (uni);
 290     left = g_unichar_to_utf8 (uni, NULL);
 291     if (left >= *remain)
 292         return FALSE;
 293 
 294     left = g_unichar_to_utf8 (uni, *out);
 295     (*out) += left;
 296     (*remain) -= left;
 297     return TRUE;
 298 }
 299 
 300 /* --------------------------------------------------------------------------------------------- */
 301 
 302 static gboolean
 303 str_utf8_tolower (const char *text, char **out, size_t *remain)
     /* [previous][next][first][last][top][bottom][index][help]  */
 304 {
 305     gunichar uni;
 306     size_t left;
 307 
 308     uni = g_utf8_get_char_validated (text, -1);
 309     if (uni == (gunichar) (-1) || uni == (gunichar) (-2))
 310         return FALSE;
 311 
 312     uni = g_unichar_tolower (uni);
 313     left = g_unichar_to_utf8 (uni, NULL);
 314     if (left >= *remain)
 315         return FALSE;
 316 
 317     left = g_unichar_to_utf8 (uni, *out);
 318     (*out) += left;
 319     (*remain) -= left;
 320     return TRUE;
 321 }
 322 
 323 /* --------------------------------------------------------------------------------------------- */
 324 
 325 static int
 326 str_utf8_length (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 327 {
 328     int result = 0;
 329     const char *start;
 330     const char *end;
 331 
 332     start = text;
 333     while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
 334     {
 335         if (start != end)
 336             result += g_utf8_strlen (start, end - start);
 337 
 338         result++;
 339         start = end + 1;
 340     }
 341 
 342     if (start == text)
 343         result = g_utf8_strlen (text, -1);
 344     else if (start[0] != '\0' && start != end)
 345         result += g_utf8_strlen (start, end - start);
 346 
 347     return result;
 348 }
 349 
 350 /* --------------------------------------------------------------------------------------------- */
 351 
 352 static int
 353 str_utf8_length2 (const char *text, int size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 354 {
 355     int result = 0;
 356     const char *start;
 357     const char *end;
 358 
 359     start = text;
 360     while (!g_utf8_validate (start, -1, &end) && start[0] != '\0' && size > 0)
 361     {
 362         if (start != end)
 363         {
 364             result += g_utf8_strlen (start, MIN (end - start, size));
 365             size -= end - start;
 366         }
 367         result += (size > 0);
 368         size--;
 369         start = end + 1;
 370     }
 371 
 372     if (start == text)
 373         result = g_utf8_strlen (text, size);
 374     else if (start[0] != '\0' && start != end && size > 0)
 375         result += g_utf8_strlen (start, MIN (end - start, size));
 376 
 377     return result;
 378 }
 379 
 380 /* --------------------------------------------------------------------------------------------- */
 381 
 382 static int
 383 str_utf8_length_noncomb (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 384 {
 385     int result = 0;
 386     const char *t = text;
 387 
 388     while (t[0] != '\0')
 389     {
 390         str_utf8_cnext_noncomb_char (&t);
 391         result++;
 392     }
 393 
 394     return result;
 395 }
 396 
 397 /* --------------------------------------------------------------------------------------------- */
 398 
 399 #if 0
 400 static void
 401 str_utf8_questmark_sustb (char **string, size_t *left, GString *buffer)
     /* [previous][next][first][last][top][bottom][index][help]  */
 402 {
 403     char *next;
 404 
 405     next = g_utf8_next_char (*string);
 406     (*left) -= next - (*string);
 407     (*string) = next;
 408     g_string_append_c (buffer, '?');
 409 }
 410 #endif
 411 
 412 /* --------------------------------------------------------------------------------------------- */
 413 
 414 static gchar *
 415 str_utf8_conv_gerror_message (GError *mcerror, const char *def_msg)
     /* [previous][next][first][last][top][bottom][index][help]  */
 416 {
 417     if (mcerror != NULL)
 418         return g_strdup (mcerror->message);
 419 
 420     return g_strdup (def_msg != NULL ? def_msg : "");
 421 }
 422 
 423 /* --------------------------------------------------------------------------------------------- */
 424 
 425 static estr_t
 426 str_utf8_vfs_convert_to (GIConv coder, const char *string, int size, GString *buffer)
     /* [previous][next][first][last][top][bottom][index][help]  */
 427 {
 428     estr_t result = ESTR_SUCCESS;
 429 
 430     if (coder == str_cnv_not_convert)
 431         g_string_append_len (buffer, string, size);
 432     else
 433         result = str_nconvert (coder, string, size, buffer);
 434 
 435     return result;
 436 }
 437 
 438 /* --------------------------------------------------------------------------------------------- */
 439 /* utility function, that makes string valid in utf8 and all characters printable
 440  * return width of string too */
 441 
 442 static const struct term_form *
 443 str_utf8_make_make_term_form (const char *text, const ssize_t width)
     /* [previous][next][first][last][top][bottom][index][help]  */
 444 {
 445     static struct term_form result;
 446     size_t width1;
 447     gunichar uni;
 448     size_t left;
 449     char *actual;
 450 
 451     width1 = width < 0 ? SIZE_MAX : (size_t) width;
 452 
 453     result.text[0] = '\0';
 454     result.width = 0;
 455     result.compose = FALSE;
 456 
 457     if (width1 == 0 || text[0] == '\0')
 458         return &result;
 459 
 460     actual = result.text;
 461 
 462     /* check if text start with combining character,
 463      * add space at begin in this case */
 464     uni = g_utf8_get_char_validated (text, -1);
 465     if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)) && str_unichar_iscombiningmark (uni))
 466     {
 467         actual[0] = ' ';
 468         actual++;
 469         result.width++;
 470         result.compose = TRUE;
 471     }
 472 
 473     for (; width1 != 0 && text[0] != '\0'; width1--)
 474     {
 475         uni = g_utf8_get_char_validated (text, -1);
 476         if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
 477         {
 478             if (g_unichar_isprint (uni))
 479             {
 480                 left = g_unichar_to_utf8 (uni, actual);
 481                 actual += left;
 482                 if (str_unichar_iscombiningmark (uni))
 483                     result.compose = TRUE;
 484                 else
 485                 {
 486                     result.width++;
 487                     if (g_unichar_iswide (uni))
 488                         result.width++;
 489                 }
 490             }
 491             else
 492             {
 493                 actual[0] = '.';
 494                 actual++;
 495                 result.width++;
 496             }
 497             text = g_utf8_next_char (text);
 498         }
 499         else
 500         {
 501             size_t repl_len;
 502 
 503             text++;
 504             // actual[0] = '?';
 505             repl_len = strlen (replch);
 506             memcpy (actual, replch, repl_len);
 507             actual += repl_len;
 508             result.width++;
 509         }
 510     }
 511     actual[0] = '\0';
 512 
 513     return &result;
 514 }
 515 
 516 /* --------------------------------------------------------------------------------------------- */
 517 
 518 static const char *
 519 str_utf8_term_form (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 520 {
 521     static char result[BUF_MEDIUM * MB_LEN_MAX];
 522     const struct term_form *pre_form;
 523 
 524     pre_form = str_utf8_make_make_term_form (text, -1);
 525     if (pre_form->compose)
 526     {
 527         char *composed;
 528 
 529         composed = g_utf8_normalize (pre_form->text, -1, G_NORMALIZE_DEFAULT_COMPOSE);
 530         g_strlcpy (result, composed, sizeof (result));
 531         g_free (composed);
 532     }
 533     else
 534         g_strlcpy (result, pre_form->text, sizeof (result));
 535 
 536     return result;
 537 }
 538 
 539 /* --------------------------------------------------------------------------------------------- */
 540 /* utility function, that copies all characters from checked to actual */
 541 
 542 static gboolean
 543 utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
     /* [previous][next][first][last][top][bottom][index][help]  */
 544 {
 545     tool->compose = FALSE;
 546 
 547     while (tool->checked[0] != '\0')
 548     {
 549         gunichar uni;
 550         size_t left;
 551 
 552         uni = g_utf8_get_char (tool->checked);
 553         tool->compose = tool->compose || str_unichar_iscombiningmark (uni);
 554         left = g_unichar_to_utf8 (uni, NULL);
 555         if (tool->remain <= left)
 556             return FALSE;
 557         left = g_unichar_to_utf8 (uni, tool->actual);
 558         tool->actual += left;
 559         tool->remain -= left;
 560         tool->checked = g_utf8_next_char (tool->checked);
 561     }
 562 
 563     return TRUE;
 564 }
 565 
 566 /* --------------------------------------------------------------------------------------------- */
 567 /* utility function, that copies characters from checked to actual until ident is
 568  * smaller than to_ident */
 569 
 570 static gboolean
 571 utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
     /* [previous][next][first][last][top][bottom][index][help]  */
 572 {
 573     tool->compose = FALSE;
 574 
 575     while (tool->checked[0] != '\0')
 576     {
 577         gunichar uni;
 578         size_t left;
 579         int w = 0;
 580 
 581         uni = g_utf8_get_char (tool->checked);
 582         if (str_unichar_iscombiningmark (uni))
 583             tool->compose = TRUE;
 584         else
 585         {
 586             w = 1;
 587             if (g_unichar_iswide (uni))
 588                 w++;
 589             if (tool->ident + w > to_ident)
 590                 return TRUE;
 591         }
 592 
 593         left = g_unichar_to_utf8 (uni, NULL);
 594         if (tool->remain <= left)
 595             return FALSE;
 596         left = g_unichar_to_utf8 (uni, tool->actual);
 597         tool->actual += left;
 598         tool->remain -= left;
 599         tool->checked = g_utf8_next_char (tool->checked);
 600         tool->ident += w;
 601     }
 602 
 603     return TRUE;
 604 }
 605 
 606 /* --------------------------------------------------------------------------------------------- */
 607 /* utility function, adds count spaces to actual */
 608 
 609 static int
 610 utf8_tool_insert_space (struct utf8_tool *tool, int count)
     /* [previous][next][first][last][top][bottom][index][help]  */
 611 {
 612     if (count <= 0)
 613         return 1;
 614     if (tool->remain <= (gsize) count)
 615         return 0;
 616 
 617     memset (tool->actual, ' ', count);
 618     tool->actual += count;
 619     tool->remain -= count;
 620     return 1;
 621 }
 622 
 623 /* --------------------------------------------------------------------------------------------- */
 624 /* utility function, adds one characters to actual */
 625 
 626 static int
 627 utf8_tool_insert_char (struct utf8_tool *tool, char ch)
     /* [previous][next][first][last][top][bottom][index][help]  */
 628 {
 629     if (tool->remain <= 1)
 630         return 0;
 631 
 632     tool->actual[0] = ch;
 633     tool->actual++;
 634     tool->remain--;
 635     return 1;
 636 }
 637 
 638 /* --------------------------------------------------------------------------------------------- */
 639 /* utility function, thah skips characters from checked until ident is greater or
 640  * equal to to_ident */
 641 
 642 static gboolean
 643 utf8_tool_skip_chars_to (struct utf8_tool *tool, int to_ident)
     /* [previous][next][first][last][top][bottom][index][help]  */
 644 {
 645     gunichar uni;
 646 
 647     while (to_ident > tool->ident && tool->checked[0] != '\0')
 648     {
 649         uni = g_utf8_get_char (tool->checked);
 650         if (!str_unichar_iscombiningmark (uni))
 651         {
 652             tool->ident++;
 653             if (g_unichar_iswide (uni))
 654                 tool->ident++;
 655         }
 656         tool->checked = g_utf8_next_char (tool->checked);
 657     }
 658 
 659     uni = g_utf8_get_char (tool->checked);
 660     while (str_unichar_iscombiningmark (uni))
 661     {
 662         tool->checked = g_utf8_next_char (tool->checked);
 663         uni = g_utf8_get_char (tool->checked);
 664     }
 665 
 666     return TRUE;
 667 }
 668 
 669 /* --------------------------------------------------------------------------------------------- */
 670 
 671 static void
 672 utf8_tool_compose (char *buffer, size_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 673 {
 674     char *composed;
 675 
 676     composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE);
 677     g_strlcpy (buffer, composed, size);
 678     g_free (composed);
 679 }
 680 
 681 /* --------------------------------------------------------------------------------------------- */
 682 
 683 static const char *
 684 str_utf8_fit_to_term (const char *text, int width, align_crt_t just_mode)
     /* [previous][next][first][last][top][bottom][index][help]  */
 685 {
 686     static char result[BUF_MEDIUM * MB_LEN_MAX];
 687     const struct term_form *pre_form;
 688     struct utf8_tool tool;
 689 
 690     pre_form = str_utf8_make_make_term_form (text, -1);
 691 
 692     tool.checked = pre_form->text;
 693     tool.actual = result;
 694     tool.remain = sizeof (result);
 695     tool.compose = FALSE;
 696 
 697     if (pre_form->width <= (gsize) width)
 698     {
 699         switch (HIDE_FIT (just_mode))
 700         {
 701         case J_CENTER_LEFT:
 702         case J_CENTER:
 703             tool.ident = (width - pre_form->width) / 2;
 704             break;
 705         case J_RIGHT:
 706             tool.ident = width - pre_form->width;
 707             break;
 708         default:
 709             tool.ident = 0;
 710             break;
 711         }
 712 
 713         utf8_tool_insert_space (&tool, tool.ident);
 714         utf8_tool_copy_chars_to_end (&tool);
 715         utf8_tool_insert_space (&tool, width - pre_form->width - tool.ident);
 716     }
 717     else if (IS_FIT (just_mode))
 718     {
 719         tool.ident = 0;
 720         utf8_tool_copy_chars_to (&tool, width / 2);
 721         utf8_tool_insert_char (&tool, '~');
 722 
 723         tool.ident = 0;
 724         utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
 725         utf8_tool_copy_chars_to_end (&tool);
 726         utf8_tool_insert_space (&tool, width - (pre_form->width - tool.ident + 1));
 727     }
 728     else
 729     {
 730         switch (HIDE_FIT (just_mode))
 731         {
 732         case J_CENTER:
 733             tool.ident = (width - pre_form->width) / 2;
 734             break;
 735         case J_RIGHT:
 736             tool.ident = width - pre_form->width;
 737             break;
 738         default:
 739             tool.ident = 0;
 740             break;
 741         }
 742 
 743         utf8_tool_skip_chars_to (&tool, 0);
 744         utf8_tool_insert_space (&tool, tool.ident);
 745         utf8_tool_copy_chars_to (&tool, width);
 746         utf8_tool_insert_space (&tool, width - tool.ident);
 747     }
 748 
 749     tool.actual[0] = '\0';
 750     if (tool.compose)
 751         utf8_tool_compose (result, sizeof (result));
 752     return result;
 753 }
 754 
 755 /* --------------------------------------------------------------------------------------------- */
 756 
 757 static const char *
 758 str_utf8_term_trim (const char *text, const ssize_t width)
     /* [previous][next][first][last][top][bottom][index][help]  */
 759 {
 760     static char result[BUF_MEDIUM * MB_LEN_MAX];
 761     const struct term_form *pre_form;
 762     struct utf8_tool tool;
 763 
 764     if (width < 1)
 765     {
 766         result[0] = '\0';
 767         return result;
 768     }
 769 
 770     const size_t width1 = (size_t) width;
 771 
 772     pre_form = str_utf8_make_make_term_form (text, -1);
 773 
 774     tool.checked = pre_form->text;
 775     tool.actual = result;
 776     tool.remain = sizeof (result);
 777     tool.compose = FALSE;
 778 
 779     if (width1 >= pre_form->width)
 780         utf8_tool_copy_chars_to_end (&tool);
 781     else if (width1 <= 3)
 782     {
 783         memset (tool.actual, '.', width1);
 784         tool.actual += width1;
 785         tool.remain -= width1;
 786     }
 787     else
 788     {
 789         memset (tool.actual, '.', 3);
 790         tool.actual += 3;
 791         tool.remain -= 3;
 792 
 793         tool.ident = 0;
 794         utf8_tool_skip_chars_to (&tool, pre_form->width - width1 + 3);
 795         utf8_tool_copy_chars_to_end (&tool);
 796     }
 797 
 798     tool.actual[0] = '\0';
 799     if (tool.compose)
 800         utf8_tool_compose (result, sizeof (result));
 801     return result;
 802 }
 803 
 804 /* --------------------------------------------------------------------------------------------- */
 805 
 806 static size_t
 807 str_utf8_term_width2 (const char *text, const ssize_t width)
     /* [previous][next][first][last][top][bottom][index][help]  */
 808 {
 809     const struct term_form *result = str_utf8_make_make_term_form (text, width);
 810 
 811     return result->width;
 812 }
 813 
 814 /* --------------------------------------------------------------------------------------------- */
 815 
 816 static size_t
 817 str_utf8_term_width1 (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 818 {
 819     return str_utf8_term_width2 (text, -1);
 820 }
 821 
 822 /* --------------------------------------------------------------------------------------------- */
 823 
 824 static int
 825 str_utf8_term_char_width (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
 826 {
 827     gunichar uni;
 828 
 829     uni = g_utf8_get_char_validated (text, -1);
 830     return (str_unichar_iscombiningmark (uni)) ? 0 : ((g_unichar_iswide (uni)) ? 2 : 1);
 831 }
 832 
 833 /* --------------------------------------------------------------------------------------------- */
 834 
 835 static const char *
 836 str_utf8_term_substring (const char *text, int start, int width)
     /* [previous][next][first][last][top][bottom][index][help]  */
 837 {
 838     static char result[BUF_MEDIUM * MB_LEN_MAX];
 839     const struct term_form *pre_form;
 840     struct utf8_tool tool;
 841 
 842     pre_form = str_utf8_make_make_term_form (text, -1);
 843 
 844     tool.checked = pre_form->text;
 845     tool.actual = result;
 846     tool.remain = sizeof (result);
 847     tool.compose = FALSE;
 848 
 849     tool.ident = -start;
 850     utf8_tool_skip_chars_to (&tool, 0);
 851     if (tool.ident < 0)
 852         tool.ident = 0;
 853     utf8_tool_insert_space (&tool, tool.ident);
 854 
 855     utf8_tool_copy_chars_to (&tool, width);
 856     utf8_tool_insert_space (&tool, width - tool.ident);
 857 
 858     tool.actual[0] = '\0';
 859     if (tool.compose)
 860         utf8_tool_compose (result, sizeof (result));
 861     return result;
 862 }
 863 
 864 /* --------------------------------------------------------------------------------------------- */
 865 
 866 static const char *
 867 str_utf8_trunc (const char *text, const ssize_t width)
     /* [previous][next][first][last][top][bottom][index][help]  */
 868 {
 869     static char result[MC_MAXPATHLEN * MB_LEN_MAX * 2];
 870     const struct term_form *pre_form;
 871     struct utf8_tool tool;
 872 
 873     const size_t width1 = width < 0 ? SIZE_MAX : (size_t) width;
 874 
 875     pre_form = str_utf8_make_make_term_form (text, -1);
 876 
 877     tool.checked = pre_form->text;
 878     tool.actual = result;
 879     tool.remain = sizeof (result);
 880     tool.compose = FALSE;
 881 
 882     if (pre_form->width <= width1)
 883         utf8_tool_copy_chars_to_end (&tool);
 884     else
 885     {
 886         tool.ident = 0;
 887         utf8_tool_copy_chars_to (&tool, width1 / 2);
 888         utf8_tool_insert_char (&tool, '~');
 889 
 890         tool.ident = 0;
 891         utf8_tool_skip_chars_to (&tool, pre_form->width - width1 + 1);
 892         utf8_tool_copy_chars_to_end (&tool);
 893     }
 894 
 895     tool.actual[0] = '\0';
 896     if (tool.compose)
 897         utf8_tool_compose (result, sizeof (result));
 898     return result;
 899 }
 900 
 901 /* --------------------------------------------------------------------------------------------- */
 902 
 903 static int
 904 str_utf8_offset_to_pos (const char *text, size_t length)
     /* [previous][next][first][last][top][bottom][index][help]  */
 905 {
 906     if (str_utf8_is_valid_string (text))
 907         return g_utf8_offset_to_pointer (text, length) - text;
 908     else
 909     {
 910         int result;
 911         char *buffer;
 912 
 913         buffer = g_strdup (text);
 914         str_utf8_fix_string (buffer);
 915         result = g_utf8_offset_to_pointer (buffer, length) - buffer;
 916         g_free (buffer);
 917         return result;
 918     }
 919 }
 920 
 921 /* --------------------------------------------------------------------------------------------- */
 922 
 923 static int
 924 str_utf8_column_to_pos (const char *text, size_t pos)
     /* [previous][next][first][last][top][bottom][index][help]  */
 925 {
 926     int result = 0;
 927     int width = 0;
 928 
 929     while (text[0] != '\0')
 930     {
 931         gunichar uni;
 932 
 933         uni = g_utf8_get_char_validated (text, MB_LEN_MAX);
 934         if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
 935         {
 936             if (g_unichar_isprint (uni))
 937             {
 938                 if (!str_unichar_iscombiningmark (uni))
 939                 {
 940                     width++;
 941                     if (g_unichar_iswide (uni))
 942                         width++;
 943                 }
 944             }
 945             else
 946             {
 947                 width++;
 948             }
 949             text = g_utf8_next_char (text);
 950         }
 951         else
 952         {
 953             text++;
 954             width++;
 955         }
 956 
 957         if ((gsize) width > pos)
 958             return result;
 959 
 960         result++;
 961     }
 962 
 963     return result;
 964 }
 965 
 966 /* --------------------------------------------------------------------------------------------- */
 967 
 968 static char *
 969 str_utf8_create_search_needle (const char *needle, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
 970 {
 971     char *fold, *result;
 972 
 973     if (needle == NULL)
 974         return NULL;
 975 
 976     if (case_sen)
 977         return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL);
 978 
 979     fold = g_utf8_casefold (needle, -1);
 980     result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
 981     g_free (fold);
 982     return result;
 983 }
 984 
 985 /* --------------------------------------------------------------------------------------------- */
 986 
 987 static void
 988 str_utf8_release_search_needle (char *needle, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
 989 {
 990     (void) case_sen;
 991     g_free (needle);
 992 }
 993 
 994 /* --------------------------------------------------------------------------------------------- */
 995 
 996 static const char *
 997 str_utf8_search_first (const char *text, const char *search, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
 998 {
 999     char *deco_text;
1000     const char *match;
1001     const char *result = NULL;
1002     size_t search_len;
1003 
1004     if (case_sen)
1005         deco_text = g_utf8_normalize (text, -1, G_NORMALIZE_ALL);
1006     else
1007     {
1008         char *fold_text;
1009 
1010         fold_text = g_utf8_casefold (text, -1);
1011         deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL);
1012         g_free (fold_text);
1013     }
1014 
1015     search_len = strlen (search);
1016 
1017     match = deco_text;
1018     do
1019     {
1020         match = g_strstr_len (match, -1, search);
1021         if (match != NULL)
1022         {
1023             if ((!str_utf8_iscombiningmark (match) || (match == deco_text))
1024                 && !str_utf8_iscombiningmark (match + search_len))
1025             {
1026                 const char *m = deco_text;
1027 
1028                 result = text;
1029                 while (m < match)
1030                 {
1031                     str_utf8_cnext_noncomb_char (&m);
1032                     str_utf8_cnext_noncomb_char (&result);
1033                 }
1034             }
1035             else
1036                 str_utf8_cnext_char (&match);
1037         }
1038     }
1039     while (match != NULL && result == NULL);
1040 
1041     g_free (deco_text);
1042 
1043     return result;
1044 }
1045 
1046 /* --------------------------------------------------------------------------------------------- */
1047 
1048 static const char *
1049 str_utf8_search_last (const char *text, const char *search, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
1050 {
1051     char *deco_text;
1052     char *match;
1053     const char *result = NULL;
1054     size_t search_len;
1055 
1056     if (case_sen)
1057         deco_text = g_utf8_normalize (text, -1, G_NORMALIZE_ALL);
1058     else
1059     {
1060         char *fold_text;
1061 
1062         fold_text = g_utf8_casefold (text, -1);
1063         deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL);
1064         g_free (fold_text);
1065     }
1066 
1067     search_len = strlen (search);
1068 
1069     do
1070     {
1071         match = g_strrstr_len (deco_text, -1, search);
1072         if (match != NULL)
1073         {
1074             if ((!str_utf8_iscombiningmark (match) || (match == deco_text))
1075                 && !str_utf8_iscombiningmark (match + search_len))
1076             {
1077                 const char *m = deco_text;
1078 
1079                 result = text;
1080                 while (m < match)
1081                 {
1082                     str_utf8_cnext_noncomb_char (&m);
1083                     str_utf8_cnext_noncomb_char (&result);
1084                 }
1085             }
1086             else
1087                 match[0] = '\0';
1088         }
1089     }
1090     while (match != NULL && result == NULL);
1091 
1092     g_free (deco_text);
1093 
1094     return result;
1095 }
1096 
1097 /* --------------------------------------------------------------------------------------------- */
1098 
1099 static char *
1100 str_utf8_normalize (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
1101 {
1102     GString *fixed;
1103     char *tmp;
1104     char *result;
1105     const char *start;
1106     const char *end;
1107 
1108     /* g_utf8_normalize() is a heavyweight function, that converts UTF-8 into UCS-4,
1109      * does the normalization and then converts UCS-4 back into UTF-8.
1110      * Since file names are composed of ASCII characters in most cases, we can speed up
1111      * utf8 normalization by checking if the heavyweight Unicode normalization is actually
1112      * needed. Normalization of ASCII string is no-op.
1113      */
1114 
1115     // find out whether text is ASCII only
1116     for (end = text; *end != '\0'; end++)
1117         if ((*end & 0x80) != 0)
1118         {
1119             // found 2nd byte of utf8-encoded symbol
1120             break;
1121         }
1122 
1123     // if text is ASCII-only, return copy, normalize otherwise
1124     if (*end == '\0')
1125         return g_strndup (text, end - text);
1126 
1127     fixed = g_string_sized_new (4);
1128 
1129     start = text;
1130     while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
1131     {
1132         if (start != end)
1133         {
1134             tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL);
1135             g_string_append (fixed, tmp);
1136             g_free (tmp);
1137         }
1138         g_string_append_c (fixed, end[0]);
1139         start = end + 1;
1140     }
1141 
1142     if (start == text)
1143     {
1144         result = g_utf8_normalize (text, -1, G_NORMALIZE_ALL);
1145         g_string_free (fixed, TRUE);
1146     }
1147     else
1148     {
1149         if (start[0] != '\0' && start != end)
1150         {
1151             tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL);
1152             g_string_append (fixed, tmp);
1153             g_free (tmp);
1154         }
1155         result = g_string_free (fixed, FALSE);
1156     }
1157 
1158     return result;
1159 }
1160 
1161 /* --------------------------------------------------------------------------------------------- */
1162 
1163 static char *
1164 str_utf8_casefold_normalize (const char *text)
     /* [previous][next][first][last][top][bottom][index][help]  */
1165 {
1166     GString *fixed;
1167     char *tmp, *fold;
1168     char *result;
1169     const char *start;
1170     const char *end;
1171 
1172     fixed = g_string_sized_new (4);
1173 
1174     start = text;
1175     while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
1176     {
1177         if (start != end)
1178         {
1179             fold = g_utf8_casefold (start, end - start);
1180             tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
1181             g_string_append (fixed, tmp);
1182             g_free (tmp);
1183             g_free (fold);
1184         }
1185         g_string_append_c (fixed, end[0]);
1186         start = end + 1;
1187     }
1188 
1189     if (start == text)
1190     {
1191         fold = g_utf8_casefold (text, -1);
1192         result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
1193         g_free (fold);
1194         g_string_free (fixed, TRUE);
1195     }
1196     else
1197     {
1198         if (start[0] != '\0' && start != end)
1199         {
1200             fold = g_utf8_casefold (start, end - start);
1201             tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
1202             g_string_append (fixed, tmp);
1203             g_free (tmp);
1204             g_free (fold);
1205         }
1206         result = g_string_free (fixed, FALSE);
1207     }
1208 
1209     return result;
1210 }
1211 
1212 /* --------------------------------------------------------------------------------------------- */
1213 
1214 static int
1215 str_utf8_compare (const char *t1, const char *t2)
     /* [previous][next][first][last][top][bottom][index][help]  */
1216 {
1217     char *n1, *n2;
1218     int result;
1219 
1220     n1 = str_utf8_normalize (t1);
1221     n2 = str_utf8_normalize (t2);
1222 
1223     result = strcmp (n1, n2);
1224 
1225     g_free (n1);
1226     g_free (n2);
1227 
1228     return result;
1229 }
1230 
1231 /* --------------------------------------------------------------------------------------------- */
1232 
1233 static int
1234 str_utf8_ncompare (const char *t1, const char *t2)
     /* [previous][next][first][last][top][bottom][index][help]  */
1235 {
1236     char *n1, *n2;
1237     size_t l1, l2;
1238     int result;
1239 
1240     n1 = str_utf8_normalize (t1);
1241     n2 = str_utf8_normalize (t2);
1242 
1243     l1 = strlen (n1);
1244     l2 = strlen (n2);
1245     result = strncmp (n1, n2, MIN (l1, l2));
1246 
1247     g_free (n1);
1248     g_free (n2);
1249 
1250     return result;
1251 }
1252 
1253 /* --------------------------------------------------------------------------------------------- */
1254 
1255 static int
1256 str_utf8_casecmp (const char *t1, const char *t2)
     /* [previous][next][first][last][top][bottom][index][help]  */
1257 {
1258     char *n1, *n2;
1259     int result;
1260 
1261     n1 = str_utf8_casefold_normalize (t1);
1262     n2 = str_utf8_casefold_normalize (t2);
1263 
1264     result = strcmp (n1, n2);
1265 
1266     g_free (n1);
1267     g_free (n2);
1268 
1269     return result;
1270 }
1271 
1272 /* --------------------------------------------------------------------------------------------- */
1273 
1274 static int
1275 str_utf8_ncasecmp (const char *t1, const char *t2)
     /* [previous][next][first][last][top][bottom][index][help]  */
1276 {
1277     char *n1, *n2;
1278     size_t l1, l2;
1279     int result;
1280 
1281     n1 = str_utf8_casefold_normalize (t1);
1282     n2 = str_utf8_casefold_normalize (t2);
1283 
1284     l1 = strlen (n1);
1285     l2 = strlen (n2);
1286     result = strncmp (n1, n2, MIN (l1, l2));
1287 
1288     g_free (n1);
1289     g_free (n2);
1290 
1291     return result;
1292 }
1293 
1294 /* --------------------------------------------------------------------------------------------- */
1295 
1296 static int
1297 str_utf8_prefix (const char *text, const char *prefix)
     /* [previous][next][first][last][top][bottom][index][help]  */
1298 {
1299     char *t, *p;
1300     const char *nt, *np;
1301     const char *nnt, *nnp;
1302     int result;
1303 
1304     t = str_utf8_normalize (text);
1305     p = str_utf8_normalize (prefix);
1306     nt = t;
1307     np = p;
1308     nnt = t;
1309     nnp = p;
1310 
1311     while (nt[0] != '\0' && np[0] != '\0')
1312     {
1313         str_utf8_cnext_char_safe (&nnt);
1314         str_utf8_cnext_char_safe (&nnp);
1315         if (nnt - nt != nnp - np)
1316             break;
1317         if (strncmp (nt, np, nnt - nt) != 0)
1318             break;
1319         nt = nnt;
1320         np = nnp;
1321     }
1322 
1323     result = np - p;
1324 
1325     g_free (t);
1326     g_free (p);
1327 
1328     return result;
1329 }
1330 
1331 /* --------------------------------------------------------------------------------------------- */
1332 
1333 static int
1334 str_utf8_caseprefix (const char *text, const char *prefix)
     /* [previous][next][first][last][top][bottom][index][help]  */
1335 {
1336     char *t, *p;
1337     const char *nt, *np;
1338     const char *nnt, *nnp;
1339     int result;
1340 
1341     t = str_utf8_casefold_normalize (text);
1342     p = str_utf8_casefold_normalize (prefix);
1343     nt = t;
1344     np = p;
1345     nnt = t;
1346     nnp = p;
1347 
1348     while (nt[0] != '\0' && np[0] != '\0')
1349     {
1350         str_utf8_cnext_char_safe (&nnt);
1351         str_utf8_cnext_char_safe (&nnp);
1352         if (nnt - nt != nnp - np)
1353             break;
1354         if (strncmp (nt, np, nnt - nt) != 0)
1355             break;
1356         nt = nnt;
1357         np = nnp;
1358     }
1359 
1360     result = np - p;
1361 
1362     g_free (t);
1363     g_free (p);
1364 
1365     return result;
1366 }
1367 
1368 /* --------------------------------------------------------------------------------------------- */
1369 
1370 static char *
1371 str_utf8_create_key_gen (const char *text, gboolean case_sen,
     /* [previous][next][first][last][top][bottom][index][help]  */
1372                          gchar *(*keygen) (const gchar *text, gssize size))
1373 {
1374     char *result;
1375 
1376     if (case_sen)
1377         result = str_utf8_normalize (text);
1378     else
1379     {
1380         gboolean dot;
1381         GString *fixed;
1382         const char *start, *end;
1383         char *fold, *key;
1384 
1385         dot = text[0] == '.';
1386         fixed = g_string_sized_new (16);
1387 
1388         if (!dot)
1389             start = text;
1390         else
1391         {
1392             start = text + 1;
1393             g_string_append_c (fixed, '.');
1394         }
1395 
1396         while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
1397         {
1398             if (start != end)
1399             {
1400                 fold = g_utf8_casefold (start, end - start);
1401                 key = keygen (fold, -1);
1402                 g_string_append (fixed, key);
1403                 g_free (key);
1404                 g_free (fold);
1405             }
1406             g_string_append_c (fixed, end[0]);
1407             start = end + 1;
1408         }
1409 
1410         if (start == text)
1411         {
1412             fold = g_utf8_casefold (start, -1);
1413             result = keygen (fold, -1);
1414             g_free (fold);
1415             g_string_free (fixed, TRUE);
1416         }
1417         else if (dot && (start == text + 1))
1418         {
1419             fold = g_utf8_casefold (start, -1);
1420             key = keygen (fold, -1);
1421             g_string_append (fixed, key);
1422             g_free (key);
1423             g_free (fold);
1424             result = g_string_free (fixed, FALSE);
1425         }
1426         else
1427         {
1428             if (start[0] != '\0' && start != end)
1429             {
1430                 fold = g_utf8_casefold (start, end - start);
1431                 key = keygen (fold, -1);
1432                 g_string_append (fixed, key);
1433                 g_free (key);
1434                 g_free (fold);
1435             }
1436             result = g_string_free (fixed, FALSE);
1437         }
1438     }
1439     return result;
1440 }
1441 
1442 /* --------------------------------------------------------------------------------------------- */
1443 
1444 static char *
1445 str_utf8_create_key (const char *text, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
1446 {
1447     return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key);
1448 }
1449 
1450 /* --------------------------------------------------------------------------------------------- */
1451 
1452 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1453 static char *
1454 str_utf8_create_key_for_filename (const char *text, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
1455 {
1456     return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key_for_filename);
1457 }
1458 #endif
1459 
1460 /* --------------------------------------------------------------------------------------------- */
1461 
1462 static int
1463 str_utf8_key_collate (const char *t1, const char *t2, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
1464 {
1465     (void) case_sen;
1466     return strcmp (t1, t2);
1467 }
1468 
1469 /* --------------------------------------------------------------------------------------------- */
1470 
1471 static void
1472 str_utf8_release_key (char *key, gboolean case_sen)
     /* [previous][next][first][last][top][bottom][index][help]  */
1473 {
1474     (void) case_sen;
1475     g_free (key);
1476 }
1477 
1478 /* --------------------------------------------------------------------------------------------- */
1479 /*** public functions ****************************************************************************/
1480 /* --------------------------------------------------------------------------------------------- */
1481 
1482 struct str_class
1483 str_utf8_init (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
1484 {
1485     struct str_class result;
1486 
1487     result.conv_gerror_message = str_utf8_conv_gerror_message;
1488     result.vfs_convert_to = str_utf8_vfs_convert_to;
1489     result.insert_replace_char = str_utf8_insert_replace_char;
1490     result.is_valid_string = str_utf8_is_valid_string;
1491     result.is_valid_char = str_utf8_is_valid_char;
1492     result.cnext_char = str_utf8_cnext_char;
1493     result.cprev_char = str_utf8_cprev_char;
1494     result.cnext_char_safe = str_utf8_cnext_char_safe;
1495     result.cprev_char_safe = str_utf8_cprev_char_safe;
1496     result.cnext_noncomb_char = str_utf8_cnext_noncomb_char;
1497     result.cprev_noncomb_char = str_utf8_cprev_noncomb_char;
1498     result.char_isspace = str_utf8_isspace;
1499     result.char_ispunct = str_utf8_ispunct;
1500     result.char_isalnum = str_utf8_isalnum;
1501     result.char_isdigit = str_utf8_isdigit;
1502     result.char_isprint = str_utf8_isprint;
1503     result.char_iscombiningmark = str_utf8_iscombiningmark;
1504     result.char_toupper = str_utf8_toupper;
1505     result.char_tolower = str_utf8_tolower;
1506     result.length = str_utf8_length;
1507     result.length2 = str_utf8_length2;
1508     result.length_noncomb = str_utf8_length_noncomb;
1509     result.fix_string = str_utf8_fix_string;
1510     result.term_form = str_utf8_term_form;
1511     result.fit_to_term = str_utf8_fit_to_term;
1512     result.term_trim = str_utf8_term_trim;
1513     result.term_width2 = str_utf8_term_width2;
1514     result.term_width1 = str_utf8_term_width1;
1515     result.term_char_width = str_utf8_term_char_width;
1516     result.term_substring = str_utf8_term_substring;
1517     result.trunc = str_utf8_trunc;
1518     result.offset_to_pos = str_utf8_offset_to_pos;
1519     result.column_to_pos = str_utf8_column_to_pos;
1520     result.create_search_needle = str_utf8_create_search_needle;
1521     result.release_search_needle = str_utf8_release_search_needle;
1522     result.search_first = str_utf8_search_first;
1523     result.search_last = str_utf8_search_last;
1524     result.compare = str_utf8_compare;
1525     result.ncompare = str_utf8_ncompare;
1526     result.casecmp = str_utf8_casecmp;
1527     result.ncasecmp = str_utf8_ncasecmp;
1528     result.prefix = str_utf8_prefix;
1529     result.caseprefix = str_utf8_caseprefix;
1530     result.create_key = str_utf8_create_key;
1531 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1532     // case insensitive sort files in "a1 a2 a10" order
1533     result.create_key_for_filename = str_utf8_create_key_for_filename;
1534 #else
1535     // case insensitive sort files in "a1 a10 a2" order
1536     result.create_key_for_filename = str_utf8_create_key;
1537 #endif
1538     result.key_collate = str_utf8_key_collate;
1539     result.release_key = str_utf8_release_key;
1540 
1541     return result;
1542 }
1543 
1544 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */