1 #ifndef MC_STRUTIL_H 2 #define MC_STRUTIL_H 3 4 #include "lib/global.h" // include glib.h 5 6 #include <sys/types.h> 7 #include <inttypes.h> 8 #include <string.h> 9 10 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c. 11 * There are two sort of functions: 12 * 1. functions for working with growing strings and conversion strings between 13 * different encodings. 14 * (implemented directly in strutil.c) 15 * 2. functions, that hide differences between encodings derived from ASCII. 16 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c) 17 * documentation is made for UTF-8 version of functions. 18 */ 19 20 /* invalid strings 21 * function, that works with invalid strings are marked with "I" 22 * in documentation 23 * invalid bytes of string are handled as one byte characters with width 1, they 24 * are displayed as question marks, I-marked comparing functions try to keep 25 * the original value of these bytes. 26 */ 27 28 /* combining characters 29 * displaying: all handled as zero with characters, expect combing character 30 * at the begin of string, this character has with one (space add before), 31 * so str_term_width is not good for computing width of singles characters 32 * (never return zero, expect empty string) 33 * for compatibility are strings composed before displaying 34 * comparing: comparing decompose all string before comparing, n-compare 35 * functions do not work as is usual, because same strings do not have to be 36 * same length in UTF-8. So they return 0 if one string is prefix of the other 37 * one. 38 * str_prefix is used to determine, how many characters from one string are 39 * prefix in second string. However, str_prefix return number of characters in 40 * decompose form. (used in do_search (screen.c)) 41 */ 42 43 /*** typedefs(not structures) and defined constants **********************************************/ 44 45 #define IS_FIT(x) ((x) & 0x0010) 46 #define MAKE_FIT(x) ((x) | 0x0010) 47 #define HIDE_FIT(x) ((x) & 0x000f) 48 49 #define INVALID_CONV ((GIConv) (-1)) 50 51 /*** enums ***************************************************************************************/ 52 53 /* results of conversion function 54 */ 55 typedef enum 56 { 57 /* Success means, that the conversion has been finished successfully 58 */ 59 ESTR_SUCCESS = 0, 60 /* Problem means, that not every character was successfully converted (some are 61 * replaced with question marks). So it is impossible to convert string back. 62 */ 63 ESTR_PROBLEM = 1, 64 /* Failure means, that the conversion is not possible (example: wrong encoding 65 * of the input string) 66 */ 67 ESTR_FAILURE = 2 68 } estr_t; 69 70 /* alignment strings on terminal 71 */ 72 typedef enum 73 { 74 J_LEFT = 0x01, 75 J_RIGHT = 0x02, 76 J_CENTER = 0x03, 77 /* if there is enough space for string on terminal, 78 * string is centered otherwise is aligned to the left */ 79 J_CENTER_LEFT = 0x04, 80 // fit alignment: if string is too long, truncate with '~' 81 J_LEFT_FIT = 0x11, 82 J_RIGHT_FIT = 0x12, 83 J_CENTER_FIT = 0x13, 84 J_CENTER_LEFT_FIT = 0x14 85 } align_crt_t; 86 87 /* string-to-integer parsing results 88 */ 89 typedef enum 90 { 91 LONGINT_OK = 0, 92 93 // These two values can be ORed together, to indicate that both errors occurred. 94 LONGINT_OVERFLOW = 1, 95 LONGINT_INVALID_SUFFIX_CHAR = 2, 96 97 LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW = (LONGINT_INVALID_SUFFIX_CHAR | LONGINT_OVERFLOW), 98 LONGINT_INVALID = 4 99 } strtol_error_t; 100 101 /*** structures declarations (and typedefs of structures)*****************************************/ 102 103 /* all functions in str_class must be defined for every encoding */ 104 struct str_class 105 { 106 gchar *(*conv_gerror_message) (GError *error, const char *def_msg); 107 /*I*/ estr_t (*vfs_convert_to) (GIConv coder, const char *string, int size, GString *buffer); 108 /*I*/ void (*insert_replace_char) (GString *buffer); 109 gboolean (*is_valid_string) (const char *text); 110 /*I*/ int (*is_valid_char) (const char *ch, size_t size); 111 /*I*/ void (*cnext_char) (const char **text); 112 void (*cprev_char) (const char **text); 113 void (*cnext_char_safe) (const char **text); 114 /*I*/ void (*cprev_char_safe) (const char **text); 115 /*I*/ int (*cnext_noncomb_char) (const char **text); 116 /*I*/ int (*cprev_noncomb_char) (const char **text, const char *begin); 117 /*I*/ gboolean (*char_isspace) (const char *ch); 118 /*I*/ gboolean (*char_ispunct) (const char *ch); 119 /*I*/ gboolean (*char_isalnum) (const char *ch); 120 /*I*/ gboolean (*char_isdigit) (const char *ch); 121 /*I*/ gboolean (*char_isprint) (const char *ch); 122 /*I*/ gboolean (*char_iscombiningmark) (const char *ch); 123 /*I*/ int (*length) (const char *text); 124 /*I*/ int (*length2) (const char *text, int size); 125 /*I*/ int (*length_noncomb) (const char *text); 126 /*I*/ gboolean (*char_toupper) (const char *ch, char **out, size_t *remain); 127 gboolean (*char_tolower) (const char *ch, char **out, size_t *remain); 128 void (*fix_string) (char *text); 129 /*I*/ const char *(*term_form) (const char *text); 130 /*I*/ const char *(*fit_to_term) (const char *text, int width, align_crt_t just_mode); 131 /*I*/ const char *(*term_trim) (const char *text, int width); 132 /*I*/ const char *(*term_substring) (const char *text, int start, int width); 133 /*I*/ int (*term_width1) (const char *text); 134 /*I*/ int (*term_width2) (const char *text, size_t length); 135 /*I*/ int (*term_char_width) (const char *length); 136 /*I*/ const char *(*trunc) (const char *length, int width); 137 /*I*/ int (*offset_to_pos) (const char *text, size_t length); 138 /*I*/ int (*column_to_pos) (const char *text, size_t pos); 139 /*I*/ char *(*create_search_needle) (const char *needle, gboolean case_sen); 140 void (*release_search_needle) (char *needle, gboolean case_sen); 141 const char *(*search_first) (const char *text, const char *needle, gboolean case_sen); 142 const char *(*search_last) (const char *text, const char *needle, gboolean case_sen); 143 int (*compare) (const char *t1, const char *t2); 144 /*I*/ int (*ncompare) (const char *t1, const char *t2); 145 /*I*/ int (*casecmp) (const char *t1, const char *t2); 146 /*I*/ int (*ncasecmp) (const char *t1, const char *t2); 147 /*I*/ int (*prefix) (const char *text, const char *prefix); 148 /*I*/ int (*caseprefix) (const char *text, const char *prefix); 149 /*I*/ char *(*create_key) (const char *text, gboolean case_sen); 150 /*I*/ char *(*create_key_for_filename) (const char *text, gboolean case_sen); 151 /*I*/ int (*key_collate) (const char *t1, const char *t2, gboolean case_sen); 152 /*I*/ void (*release_key) (char *key, gboolean case_sen); 153 }; 154 155 /*** global variables defined in .c file *********************************************************/ 156 157 /* standard converters */ 158 extern GIConv str_cnv_to_term; 159 extern GIConv str_cnv_from_term; 160 /* from terminal encoding to terminal encoding */ 161 extern GIConv str_cnv_not_convert; 162 163 /*** declarations of public functions ************************************************************/ 164 165 struct str_class str_utf8_init (void); 166 struct str_class str_8bit_init (void); 167 struct str_class str_ascii_init (void); 168 169 /* create converter from "from_enc" to terminal encoding 170 * if "from_enc" is not supported return INVALID_CONV 171 */ 172 GIConv str_crt_conv_from (const char *from_enc); 173 174 /* create converter from terminal encoding to "to_enc" 175 * if "to_enc" is not supported return INVALID_CONV 176 */ 177 GIConv str_crt_conv_to (const char *to_enc); 178 179 /* close converter, do not close str_cnv_to_term, str_cnv_from_term, 180 * str_cnv_not_convert 181 */ 182 void str_close_conv (GIConv conv); 183 184 /* return on of not used buffers (.used == 0) or create new 185 * returned buffer has set .used to 1 186 */ 187 188 /* convert string using coder, result of conversion is appended at end of buffer 189 * return ESTR_SUCCESS if there was no problem. 190 * otherwise return ESTR_PROBLEM or ESTR_FAILURE 191 */ 192 estr_t str_convert (GIConv coder, const char *string, GString *buffer); 193 estr_t str_nconvert (GIConv coder, const char *string, int size, GString *buffer); 194 195 /* convert GError message (which in UTF-8) to terminal charset 196 * def_char is used if result of error->str conversion if ESTR_FAILURE 197 * return new allocated null-terminated string, which is need to be freed 198 * I 199 */ 200 gchar *str_conv_gerror_message (GError *error, const char *def_msg); 201 202 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert 203 * result to original string. (so no replace with questionmark) 204 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied, 205 * so is possible to show file, that is not valid in terminal encoding 206 */ 207 estr_t str_vfs_convert_from (GIConv coder, const char *string, GString *buffer); 208 209 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied, 210 * does replace with question mark 211 * I 212 */ 213 estr_t str_vfs_convert_to (GIConv coder, const char *string, int size, GString *buffer); 214 215 /* printf function for str_buffer, append result of printf at the end of buffer 216 */ 217 void str_printf (GString *buffer, const char *format, ...) G_GNUC_PRINTF (2, 3); /**/ 218 219 /* add standard replacement character in terminal encoding 220 */ 221 void str_insert_replace_char (GString *buffer); 222 223 /* init strings and set terminal encoding, 224 * if is termenc NULL, detect terminal encoding 225 * create all str_cnv_* and set functions for terminal encoding 226 */ 227 void str_init_strings (const char *termenc); 228 229 /* free all str_buffer and all str_cnv_* 230 */ 231 void str_uninit_strings (void); 232 233 /* try convert characters in ch to output using conv 234 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII 235 * compatible encoding, for other must be set) 236 * return ESTR_SUCCESS if conversion was successfully, 237 * ESTR_PROBLEM if ch contains only part of characters, 238 * ESTR_FAILURE if conversion is not possible 239 */ 240 estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size, char *output, 241 size_t out_size); 242 243 /* test, if text is valid in terminal encoding 244 * I 245 */ 246 gboolean str_is_valid_string (const char *text); 247 248 /* test, if first char of ch is valid 249 * size, how many bytes characters occupied, could be (size_t)(-1) 250 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of 251 * multibyte character 252 * I 253 */ 254 int str_is_valid_char (const char *ch, size_t size); 255 256 /* return next characters after text, do not call on the end of string 257 */ 258 char *str_get_next_char (char *text); 259 const char *str_cget_next_char (const char *text); 260 261 /* return previous characters before text, do not call on the start of strings 262 */ 263 char *str_get_prev_char (char *text); 264 const char *str_cget_prev_char (const char *text); 265 266 /* set text to next characters, do not call on the end of string 267 */ 268 void str_next_char (char **text); 269 void str_cnext_char (const char **text); 270 271 /* set text to previous characters, do not call on the start of strings 272 */ 273 void str_prev_char (char **text); 274 void str_cprev_char (const char **text); 275 276 /* return next characters after text, do not call on the end of string 277 * works with invalid string 278 * I 279 */ 280 char *str_get_next_char_safe (char *text); 281 const char *str_cget_next_char_safe (const char *text); 282 283 /* return previous characters before text, do not call on the start of strings 284 * works with invalid string 285 * I 286 */ 287 char *str_get_prev_char_safe (char *text); 288 const char *str_cget_prev_char_safe (const char *text); 289 290 /* set text to next characters, do not call on the end of string 291 * works with invalid string 292 * I 293 */ 294 void str_next_char_safe (char **text); 295 void str_cnext_char_safe (const char **text); 296 297 /* set text to previous characters, do not call on the start of strings 298 * works with invalid string 299 * I 300 */ 301 void str_prev_char_safe (char **text); 302 void str_cprev_char_safe (const char **text); 303 304 /* set text to next noncombining characters, check the end of text 305 * return how many characters was skipped 306 * works with invalid string 307 * I 308 */ 309 int str_next_noncomb_char (char **text); 310 int str_cnext_noncomb_char (const char **text); 311 312 /* set text to previous noncombining characters, search stop at begin 313 * return how many characters was skipped 314 * works with invalid string 315 * I 316 */ 317 int str_prev_noncomb_char (char **text, const char *begin); 318 int str_cprev_noncomb_char (const char **text, const char *begin); 319 320 /* if first characters in ch is space, tabulator or new lines 321 * I 322 */ 323 gboolean str_isspace (const char *ch); 324 325 /* if first characters in ch is punctuation or symbol 326 * I 327 */ 328 gboolean str_ispunct (const char *ch); 329 330 /* if first characters in ch is alphanum 331 * I 332 */ 333 gboolean str_isalnum (const char *ch); 334 335 /* if first characters in ch is digit 336 * I 337 */ 338 gboolean str_isdigit (const char *ch); 339 340 /* if first characters in ch is printable 341 * I 342 */ 343 gboolean str_isprint (const char *ch); 344 345 /* if first characters in ch is a combining mark (only in utf-8) 346 * combining makrs are assumed to be zero width 347 * I 348 */ 349 gboolean str_iscombiningmark (const char *ch); 350 351 /* write lower from of first characters in ch into out 352 * decrease remain by size of returned characters 353 * if out is not big enough, do nothing 354 */ 355 gboolean str_toupper (const char *ch, char **out, size_t *remain); 356 357 /* write upper from of first characters in ch into out 358 * decrease remain by size of returned characters 359 * if out is not big enough, do nothing 360 */ 361 gboolean str_tolower (const char *ch, char **out, size_t *remain); 362 363 /* return length of text in characters 364 * I 365 */ 366 int str_length (const char *text); 367 368 /* return length of text in characters, limit to size 369 * I 370 */ 371 int str_length2 (const char *text, int size); 372 373 /* return length of one char 374 * I 375 */ 376 int str_length_char (const char *text); 377 378 /* return length of text in characters, count only noncombining characters 379 * I 380 */ 381 int str_length_noncomb (const char *text); 382 383 /* replace all invalid characters in text with questionmark 384 * after return, text is valid string in terminal encoding 385 * I 386 */ 387 void str_fix_string (char *text); 388 389 /* replace all invalid characters in text with questionmark 390 * replace all unprintable characters with '.' 391 * return static allocated string, "text" is not changed 392 * returned string do not need to be freed 393 * I 394 */ 395 const char *str_term_form (const char *text); 396 397 /* like str_term_form, but text can be alignment to width 398 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...) 399 * result is completed with spaces to width 400 * I 401 */ 402 const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode); 403 404 /* like str_term_form, but when text is wider than width, three dots are 405 * inserted at begin and result is completed with suffix of text 406 * no additional spaces are inserted 407 * I 408 */ 409 const char *str_term_trim (const char *text, int width); 410 411 /* like str_term_form, but return only specified substring 412 * start - column (position) on terminal, where substring begin 413 * result is completed with spaces to width 414 * I 415 */ 416 const char *str_term_substring (const char *text, int start, int width); 417 418 /* return width, that will be text occupied on terminal 419 * I 420 */ 421 int str_term_width1 (const char *text); 422 423 /* return width, that will be text occupied on terminal 424 * text is limited by length in characters 425 * I 426 */ 427 int str_term_width2 (const char *text, size_t length); 428 429 /* return width, that will be character occupied on terminal 430 * combining characters are always zero width 431 * I 432 */ 433 int str_term_char_width (const char *text); 434 435 /* convert position in characters to position in bytes 436 * I 437 */ 438 int str_offset_to_pos (const char *text, size_t length); 439 440 /* convert position on terminal to position in characters 441 * I 442 */ 443 int str_column_to_pos (const char *text, size_t pos); 444 445 /* like str_fit_to_term width just_mode = J_LEFT_FIT, 446 * but do not insert additional spaces 447 * I 448 */ 449 const char *str_trunc (const char *text, int width); 450 451 /* create needle, that will be searched in str_search_fist/last, 452 * so needle can be reused 453 * in UTF-8 return normalized form of needle 454 */ 455 char *str_create_search_needle (const char *needle, gboolean case_sen); 456 457 /* free needle returned by str_create_search_needle 458 */ 459 void str_release_search_needle (char *needle, gboolean case_sen); 460 461 /* search for first occurrence of search in text 462 */ 463 const char *str_search_first (const char *text, const char *needle, gboolean case_sen); 464 465 /* search for last occurrence of search in text 466 */ 467 const char *str_search_last (const char *text, const char *needle, gboolean case_sen); 468 469 /* case sensitive compare two strings 470 * I 471 */ 472 int str_compare (const char *t1, const char *t2); 473 474 /* case sensitive compare two strings 475 * if one string is prefix of the other string, return 0 476 * I 477 */ 478 int str_ncompare (const char *t1, const char *t2); 479 480 /* case insensitive compare two strings 481 * I 482 */ 483 int str_casecmp (const char *t1, const char *t2); 484 485 /* case insensitive compare two strings 486 * if one string is prefix of the other string, return 0 487 * I 488 */ 489 int str_ncasecmp (const char *t1, const char *t2); 490 491 /* return, how many bytes are are same from start in text and prefix 492 * both strings are decomposed before comparing and return value is counted 493 * in decomposed form, too. calling with prefix, prefix, you get size in bytes 494 * of prefix in decomposed form, 495 * I 496 */ 497 int str_prefix (const char *text, const char *prefix); 498 499 /* case insensitive version of str_prefix 500 * I 501 */ 502 int str_caseprefix (const char *text, const char *prefix); 503 504 /* create a key that is used by str_key_collate 505 * I 506 */ 507 char *str_create_key (const char *text, gboolean case_sen); 508 509 /* create a key that is used by str_key_collate 510 * should aware dot '.' in text 511 * I 512 */ 513 char *str_create_key_for_filename (const char *text, gboolean case_sen); 514 515 /* compare two string using LC_COLLATE, if is possible 516 * if case_sen is set, comparing is case sensitive, 517 * case_sen must be same for str_create_key, str_key_collate and str_release_key 518 * I 519 */ 520 int str_key_collate (const char *t1, const char *t2, gboolean case_sen); 521 522 /* release_key created by str_create_key, only right way to release key 523 * I 524 */ 525 void str_release_key (char *key, gboolean case_sen); 526 527 /* return TRUE if codeset_name is utf8 or utf-8 528 * I 529 */ 530 gboolean str_isutf8 (const char *codeset_name); 531 532 const char *str_detect_termencoding (void); 533 534 int str_verscmp (const char *s1, const char *s2); 535 536 /* Like filevercmp, except compare the byte arrays a (of length alen) and b (of length blen) 537 so that a and b can contain '\0', which sorts just before '\1'. But if alen is -1 treat 538 a as a string terminated by '\0', and similarly for blen. 539 */ 540 int filenvercmp (char const *a, ssize_t alen, char const *b, ssize_t blen); 541 542 /* return how many lines and columns will text occupy on terminal 543 */ 544 void str_msg_term_size (const char *text, int *lines, int *columns); 545 546 /** 547 * skip first needle's in haystack 548 * 549 * @param haystack pointer to string 550 * @param needle pointer to string 551 * @param skip_count skip first bytes 552 * 553 * @return pointer to skip_count+1 needle (or NULL if not found). 554 */ 555 556 char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count); 557 558 char *str_replace_all (const char *haystack, const char *needle, const char *replacement); 559 560 GPtrArray *str_tokenize (const char *string); 561 562 strtol_error_t xstrtoumax (const char *nptr, char **endptr, int base, uintmax_t *val, 563 const char *valid_suffixes); 564 uintmax_t parse_integer (const char *str, gboolean *invalid); 565 566 char *str_escape (const char *src, gsize src_len, const char *escaped_chars, 567 gboolean escape_non_printable); 568 char *str_unescape (const char *src, gsize src_len, const char *unescaped_chars, 569 gboolean unescape_non_printable); 570 char *str_shell_unescape (const char *text); 571 char *str_shell_escape (const char *text); 572 573 char *str_glob_escape (const char *text); 574 char *str_glob_unescape (const char *text); 575 576 char *str_regex_escape (const char *text); 577 char *str_regex_unescape (const char *text); 578 579 gboolean str_is_char_escaped (const char *start, const char *current); 580 581 void str_rstrip_eol (char *s); 582 583 /* --------------------------------------------------------------------------------------------- */ 584 /*** inline functions ****************************************************************************/ 585 /* --------------------------------------------------------------------------------------------- */ 586 587 static inline void 588 str_replace (char *s, char from, char to) 589 { 590 for (; *s != '\0'; s++) 591 { 592 if (*s == from) 593 *s = to; 594 } 595 } 596 597 /* --------------------------------------------------------------------------------------------- */ 598 /* 599 * strcpy is unsafe on overlapping memory areas, so define memmove-alike 600 * string function. 601 * Have sense only when: 602 * * dest <= src 603 * AND 604 * * dest and str are pointers to one object (as Roland Illig pointed). 605 * 606 * We can't use str*cpy funs here. 607 * 608 * @param dest pointer to string 609 * @param src pointer to string 610 * 611 * @return newly allocated string 612 * 613 */ 614 615 static inline char * 616 str_move (char *dest, const char *src) /*
*/ 617 { 618 size_t n; 619 620 g_assert (dest <= src); 621 622 n = strlen (src) + 1; // + '\0' 623 624 return (char *) memmove (dest, src, n); 625 } 626 627 /* --------------------------------------------------------------------------------------------- */ 628 /* Compare version strings: 629 630 Compare strings a and b as file names containing version numbers, and return an integer 631 that is negative, zero, or positive depending on whether a compares less than, equal to, 632 or greater than b. 633 634 Use the following version sort algorithm: 635 636 1. Compare the strings' maximal-length non-digit prefixes lexically. 637 If there is a difference return that difference. 638 Otherwise discard the prefixes and continue with the next step. 639 640 2. Compare the strings' maximal-length digit prefixes, using numeric comparison 641 of the numbers represented by each prefix. (Treat an empty prefix as zero; this can 642 happen only at string end.) 643 If there is a difference, return that difference. 644 Otherwise discard the prefixes and continue with the next step. 645 646 3. If both strings are empty, return 0. Otherwise continue with step 1. 647 648 In version sort, lexical comparison is left to right, byte by byte, using the byte's numeric 649 value (0-255), except that: 650 651 1. ASCII letters sort before other bytes. 652 2. A tilde sorts before anything, even an empty string. 653 654 In addition to the version sort rules, the following strings have special priority and sort 655 before all other strings (listed in order): 656 657 1. The empty string. 658 2. ".". 659 3. "..". 660 4. Strings starting with "." sort before other strings. 661 662 Before comparing two strings where both begin with non-".", or where both begin with "." 663 but neither is "." or "..", suffixes matching the C-locale extended regular expression 664 (\.[A-Za-z~][A-Za-z0-9~]*)*$ are removed and the strings compared without them, using version 665 sort without special priority; if they do not compare equal, this comparison result is used and 666 the suffixes are effectively ignored. Otherwise, the entire strings are compared using version 667 sort. When removing a suffix from a nonempty string, remove the maximal-length suffix such that 668 the remaining string is nonempty. 669 */ 670 671 static inline int 672 filevercmp (const char *s1, const char *s2) /*
*/ 673 { 674 return filenvercmp (s1, -1, s2, -1); 675 } 676 677 /* --------------------------------------------------------------------------------------------- */ 678 679 #endif