1 #ifndef MC_STRUTIL_H 2 #define MC_STRUTIL_H 3 4 #include "lib/global.h" /* include glib.h */ 5 6 #include <sys/types.h> 7 #include <inttypes.h> 8 #include <string.h> 9 10 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c. 11 * There are two sort of functions: 12 * 1. functions for working with growing strings and conversion strings between 13 * different encodings. 14 * (implemented directly in strutil.c) 15 * 2. functions, that hide differences between encodings derived from ASCII. 16 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c) 17 * documentation is made for UTF-8 version of functions. 18 */ 19 20 /* invalid strings 21 * function, that works with invalid strings are marked with "I" 22 * in documentation 23 * invalid bytes of string are handled as one byte characters with width 1, they 24 * are displayed as question marks, I-marked comparing functions try to keep 25 * the original value of these bytes. 26 */ 27 28 /* combining characters 29 * displaying: all handled as zero with characters, expect combing character 30 * at the begin of string, this character has with one (space add before), 31 * so str_term_width is not good for computing width of singles characters 32 * (never return zero, expect empty string) 33 * for compatibility are strings composed before displaying 34 * comparing: comparing decompose all string before comparing, n-compare 35 * functions do not work as is usual, because same strings do not have to be 36 * same length in UTF-8. So they return 0 if one string is prefix of the other 37 * one. 38 * str_prefix is used to determine, how many characters from one string are 39 * prefix in second string. However, str_prefix return number of characters in 40 * decompose form. (used in do_search (screen.c)) 41 */ 42 43 /*** typedefs(not structures) and defined constants **********************************************/ 44 45 #define IS_FIT(x) ((x) & 0x0010) 46 #define MAKE_FIT(x) ((x) | 0x0010) 47 #define HIDE_FIT(x) ((x) & 0x000f) 48 49 #define INVALID_CONV ((GIConv) (-1)) 50 51 /*** enums ***************************************************************************************/ 52 53 /* results of conversion function 54 */ 55 typedef enum 56 { 57 /* Success means, that the conversion has been finished successfully 58 */ 59 ESTR_SUCCESS = 0, 60 /* Problem means, that not every character was successfully converted (some are 61 * replaced with question marks). So it is impossible to convert string back. 62 */ 63 ESTR_PROBLEM = 1, 64 /* Failure means, that the conversion is not possible (example: wrong encoding 65 * of the input string) 66 */ 67 ESTR_FAILURE = 2 68 } estr_t; 69 70 /* alignment strings on terminal 71 */ 72 typedef enum 73 { 74 J_LEFT = 0x01, 75 J_RIGHT = 0x02, 76 J_CENTER = 0x03, 77 /* if there is enough space for string on terminal, 78 * string is centered otherwise is aligned to the left */ 79 J_CENTER_LEFT = 0x04, 80 /* fit alignment: if string is too long, truncate with '~' */ 81 J_LEFT_FIT = 0x11, 82 J_RIGHT_FIT = 0x12, 83 J_CENTER_FIT = 0x13, 84 J_CENTER_LEFT_FIT = 0x14 85 } align_crt_t; 86 87 /* string-to-integer parsing results 88 */ 89 typedef enum 90 { 91 LONGINT_OK = 0, 92 93 /* These two values can be ORed together, to indicate that both errors occurred. */ 94 LONGINT_OVERFLOW = 1, 95 LONGINT_INVALID_SUFFIX_CHAR = 2, 96 97 LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW = (LONGINT_INVALID_SUFFIX_CHAR | LONGINT_OVERFLOW), 98 LONGINT_INVALID = 4 99 } strtol_error_t; 100 101 /*** structures declarations (and typedefs of structures)*****************************************/ 102 103 /* all functions in str_class must be defined for every encoding */ 104 struct str_class 105 { 106 /* *INDENT-OFF* */ 107 gchar *(*conv_gerror_message) (GError * error, const char *def_msg); 108 /*I*/ estr_t (*vfs_convert_to) (GIConv coder, const char *string, int size, GString * buffer); 109 /*I*/ void (*insert_replace_char) (GString * buffer); 110 gboolean (*is_valid_string) (const char *text); 111 /*I*/ int (*is_valid_char) (const char *ch, size_t size); 112 /*I*/ void (*cnext_char) (const char **text); 113 void (*cprev_char) (const char **text); 114 void (*cnext_char_safe) (const char **text); 115 /*I*/ void (*cprev_char_safe) (const char **text); 116 /*I*/ int (*cnext_noncomb_char) (const char **text); 117 /*I*/ int (*cprev_noncomb_char) (const char **text, const char *begin); 118 /*I*/ gboolean (*char_isspace) (const char *ch); 119 /*I*/ gboolean (*char_ispunct) (const char *ch); 120 /*I*/ gboolean (*char_isalnum) (const char *ch); 121 /*I*/ gboolean (*char_isdigit) (const char *ch); 122 /*I*/ gboolean (*char_isprint) (const char *ch); 123 /*I*/ gboolean (*char_iscombiningmark) (const char *ch); 124 /*I*/ int (*length) (const char *text); 125 /*I*/ int (*length2) (const char *text, int size); 126 /*I*/ int (*length_noncomb) (const char *text); 127 /*I*/ gboolean (*char_toupper) (const char *ch, char **out, size_t * remain); 128 gboolean (*char_tolower) (const char *ch, char **out, size_t * remain); 129 void (*fix_string) (char *text); 130 /*I*/ const char *(*term_form) (const char *text); 131 /*I*/ const char *(*fit_to_term) (const char *text, int width, align_crt_t just_mode); 132 /*I*/ const char *(*term_trim) (const char *text, int width); 133 /*I*/ const char *(*term_substring) (const char *text, int start, int width); 134 /*I*/ int (*term_width1) (const char *text); 135 /*I*/ int (*term_width2) (const char *text, size_t length); 136 /*I*/ int (*term_char_width) (const char *length); 137 /*I*/ const char *(*trunc) (const char *length, int width); 138 /*I*/ int (*offset_to_pos) (const char *text, size_t length); 139 /*I*/ int (*column_to_pos) (const char *text, size_t pos); 140 /*I*/ char *(*create_search_needle) (const char *needle, gboolean case_sen); 141 void (*release_search_needle) (char *needle, gboolean case_sen); 142 const char *(*search_first) (const char *text, const char *needle, gboolean case_sen); 143 const char *(*search_last) (const char *text, const char *needle, gboolean case_sen); 144 int (*compare) (const char *t1, const char *t2); 145 /*I*/ int (*ncompare) (const char *t1, const char *t2); 146 /*I*/ int (*casecmp) (const char *t1, const char *t2); 147 /*I*/ int (*ncasecmp) (const char *t1, const char *t2); 148 /*I*/ int (*prefix) (const char *text, const char *prefix); 149 /*I*/ int (*caseprefix) (const char *text, const char *prefix); 150 /*I*/ char *(*create_key) (const char *text, gboolean case_sen); 151 /*I*/ char *(*create_key_for_filename) (const char *text, gboolean case_sen); 152 /*I*/ int (*key_collate) (const char *t1, const char *t2, gboolean case_sen); 153 /*I*/ void (*release_key) (char *key, gboolean case_sen); 154 /* *INDENT-ON* */ 155 }; 156 157 /*** global variables defined in .c file *********************************************************/ 158 159 /* standard converters */ 160 extern GIConv str_cnv_to_term; 161 extern GIConv str_cnv_from_term; 162 /* from terminal encoding to terminal encoding */ 163 extern GIConv str_cnv_not_convert; 164 165 /*** declarations of public functions ************************************************************/ 166 167 struct str_class str_utf8_init (void); 168 struct str_class str_8bit_init (void); 169 struct str_class str_ascii_init (void); 170 171 /* create converter from "from_enc" to terminal encoding 172 * if "from_enc" is not supported return INVALID_CONV 173 */ 174 GIConv str_crt_conv_from (const char *from_enc); 175 176 /* create converter from terminal encoding to "to_enc" 177 * if "to_enc" is not supported return INVALID_CONV 178 */ 179 GIConv str_crt_conv_to (const char *to_enc); 180 181 /* close converter, do not close str_cnv_to_term, str_cnv_from_term, 182 * str_cnv_not_convert 183 */ 184 void str_close_conv (GIConv conv); 185 186 /* return on of not used buffers (.used == 0) or create new 187 * returned buffer has set .used to 1 188 */ 189 190 /* convert string using coder, result of conversion is appended at end of buffer 191 * return ESTR_SUCCESS if there was no problem. 192 * otherwise return ESTR_PROBLEM or ESTR_FAILURE 193 */ 194 estr_t str_convert (GIConv coder, const char *string, GString * buffer); 195 estr_t str_nconvert (GIConv coder, const char *string, int size, GString * buffer); 196 197 /* convert GError message (which in UTF-8) to terminal charset 198 * def_char is used if result of error->str conversion if ESTR_FAILURE 199 * return new allocated null-terminated string, which is need to be freed 200 * I 201 */ 202 gchar *str_conv_gerror_message (GError * error, const char *def_msg); 203 204 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert 205 * result to original string. (so no replace with questionmark) 206 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied, 207 * so is possible to show file, that is not valid in terminal encoding 208 */ 209 estr_t str_vfs_convert_from (GIConv coder, const char *string, GString * buffer); 210 211 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied, 212 * does replace with question mark 213 * I 214 */ 215 estr_t str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer); 216 217 /* printf function for str_buffer, append result of printf at the end of buffer 218 */ 219 /* *INDENT-OFF* */ 220 void str_printf (GString * buffer, const char *format, ...) G_GNUC_PRINTF (2, 3); /* */ 221 /* *INDENT-ON* */ 222 223 /* add standard replacement character in terminal encoding 224 */ 225 void str_insert_replace_char (GString * buffer); 226 227 /* init strings and set terminal encoding, 228 * if is termenc NULL, detect terminal encoding 229 * create all str_cnv_* and set functions for terminal encoding 230 */ 231 void str_init_strings (const char *termenc); 232 233 /* free all str_buffer and all str_cnv_* 234 */ 235 void str_uninit_strings (void); 236 237 /* try convert characters in ch to output using conv 238 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII 239 * compatible encoding, for other must be set) 240 * return ESTR_SUCCESS if conversion was successfully, 241 * ESTR_PROBLEM if ch contains only part of characters, 242 * ESTR_FAILURE if conversion is not possible 243 */ 244 estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size, 245 char *output, size_t out_size); 246 247 /* test, if text is valid in terminal encoding 248 * I 249 */ 250 gboolean str_is_valid_string (const char *text); 251 252 /* test, if first char of ch is valid 253 * size, how many bytes characters occupied, could be (size_t)(-1) 254 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of 255 * multibyte character 256 * I 257 */ 258 int str_is_valid_char (const char *ch, size_t size); 259 260 /* return next characters after text, do not call on the end of string 261 */ 262 char *str_get_next_char (char *text); 263 const char *str_cget_next_char (const char *text); 264 265 /* return previous characters before text, do not call on the start of strings 266 */ 267 char *str_get_prev_char (char *text); 268 const char *str_cget_prev_char (const char *text); 269 270 /* set text to next characters, do not call on the end of string 271 */ 272 void str_next_char (char **text); 273 void str_cnext_char (const char **text); 274 275 /* set text to previous characters, do not call on the start of strings 276 */ 277 void str_prev_char (char **text); 278 void str_cprev_char (const char **text); 279 280 /* return next characters after text, do not call on the end of string 281 * works with invalid string 282 * I 283 */ 284 char *str_get_next_char_safe (char *text); 285 const char *str_cget_next_char_safe (const char *text); 286 287 /* return previous characters before text, do not call on the start of strings 288 * works with invalid string 289 * I 290 */ 291 char *str_get_prev_char_safe (char *text); 292 const char *str_cget_prev_char_safe (const char *text); 293 294 /* set text to next characters, do not call on the end of string 295 * works with invalid string 296 * I 297 */ 298 void str_next_char_safe (char **text); 299 void str_cnext_char_safe (const char **text); 300 301 /* set text to previous characters, do not call on the start of strings 302 * works with invalid string 303 * I 304 */ 305 void str_prev_char_safe (char **text); 306 void str_cprev_char_safe (const char **text); 307 308 /* set text to next noncombining characters, check the end of text 309 * return how many characters was skipped 310 * works with invalid string 311 * I 312 */ 313 int str_next_noncomb_char (char **text); 314 int str_cnext_noncomb_char (const char **text); 315 316 /* set text to previous noncombining characters, search stop at begin 317 * return how many characters was skipped 318 * works with invalid string 319 * I 320 */ 321 int str_prev_noncomb_char (char **text, const char *begin); 322 int str_cprev_noncomb_char (const char **text, const char *begin); 323 324 /* if first characters in ch is space, tabulator or new lines 325 * I 326 */ 327 gboolean str_isspace (const char *ch); 328 329 /* if first characters in ch is punctuation or symbol 330 * I 331 */ 332 gboolean str_ispunct (const char *ch); 333 334 /* if first characters in ch is alphanum 335 * I 336 */ 337 gboolean str_isalnum (const char *ch); 338 339 /* if first characters in ch is digit 340 * I 341 */ 342 gboolean str_isdigit (const char *ch); 343 344 /* if first characters in ch is printable 345 * I 346 */ 347 gboolean str_isprint (const char *ch); 348 349 /* if first characters in ch is a combining mark (only in utf-8) 350 * combining makrs are assumed to be zero width 351 * I 352 */ 353 gboolean str_iscombiningmark (const char *ch); 354 355 /* write lower from of first characters in ch into out 356 * decrease remain by size of returned characters 357 * if out is not big enough, do nothing 358 */ 359 gboolean str_toupper (const char *ch, char **out, size_t *remain); 360 361 /* write upper from of first characters in ch into out 362 * decrease remain by size of returned characters 363 * if out is not big enough, do nothing 364 */ 365 gboolean str_tolower (const char *ch, char **out, size_t *remain); 366 367 /* return length of text in characters 368 * I 369 */ 370 int str_length (const char *text); 371 372 /* return length of text in characters, limit to size 373 * I 374 */ 375 int str_length2 (const char *text, int size); 376 377 /* return length of one char 378 * I 379 */ 380 int str_length_char (const char *text); 381 382 /* return length of text in characters, count only noncombining characters 383 * I 384 */ 385 int str_length_noncomb (const char *text); 386 387 /* replace all invalid characters in text with questionmark 388 * after return, text is valid string in terminal encoding 389 * I 390 */ 391 void str_fix_string (char *text); 392 393 /* replace all invalid characters in text with questionmark 394 * replace all unprintable characters with '.' 395 * return static allocated string, "text" is not changed 396 * returned string do not need to be freed 397 * I 398 */ 399 const char *str_term_form (const char *text); 400 401 /* like str_term_form, but text can be alignment to width 402 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...) 403 * result is completed with spaces to width 404 * I 405 */ 406 const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode); 407 408 /* like str_term_form, but when text is wider than width, three dots are 409 * inserted at begin and result is completed with suffix of text 410 * no additional spaces are inserted 411 * I 412 */ 413 const char *str_term_trim (const char *text, int width); 414 415 416 /* like str_term_form, but return only specified substring 417 * start - column (position) on terminal, where substring begin 418 * result is completed with spaces to width 419 * I 420 */ 421 const char *str_term_substring (const char *text, int start, int width); 422 423 /* return width, that will be text occupied on terminal 424 * I 425 */ 426 int str_term_width1 (const char *text); 427 428 /* return width, that will be text occupied on terminal 429 * text is limited by length in characters 430 * I 431 */ 432 int str_term_width2 (const char *text, size_t length); 433 434 /* return width, that will be character occupied on terminal 435 * combining characters are always zero width 436 * I 437 */ 438 int str_term_char_width (const char *text); 439 440 /* convert position in characters to position in bytes 441 * I 442 */ 443 int str_offset_to_pos (const char *text, size_t length); 444 445 /* convert position on terminal to position in characters 446 * I 447 */ 448 int str_column_to_pos (const char *text, size_t pos); 449 450 /* like str_fit_to_term width just_mode = J_LEFT_FIT, 451 * but do not insert additional spaces 452 * I 453 */ 454 const char *str_trunc (const char *text, int width); 455 456 /* create needle, that will be searched in str_search_fist/last, 457 * so needle can be reused 458 * in UTF-8 return normalized form of needle 459 */ 460 char *str_create_search_needle (const char *needle, gboolean case_sen); 461 462 /* free needle returned by str_create_search_needle 463 */ 464 void str_release_search_needle (char *needle, gboolean case_sen); 465 466 /* search for first occurrence of search in text 467 */ 468 const char *str_search_first (const char *text, const char *needle, gboolean case_sen); 469 470 /* search for last occurrence of search in text 471 */ 472 const char *str_search_last (const char *text, const char *needle, gboolean case_sen); 473 474 /* case sensitive compare two strings 475 * I 476 */ 477 int str_compare (const char *t1, const char *t2); 478 479 /* case sensitive compare two strings 480 * if one string is prefix of the other string, return 0 481 * I 482 */ 483 int str_ncompare (const char *t1, const char *t2); 484 485 /* case insensitive compare two strings 486 * I 487 */ 488 int str_casecmp (const char *t1, const char *t2); 489 490 /* case insensitive compare two strings 491 * if one string is prefix of the other string, return 0 492 * I 493 */ 494 int str_ncasecmp (const char *t1, const char *t2); 495 496 /* return, how many bytes are are same from start in text and prefix 497 * both strings are decomposed before comparing and return value is counted 498 * in decomposed form, too. calling with prefix, prefix, you get size in bytes 499 * of prefix in decomposed form, 500 * I 501 */ 502 int str_prefix (const char *text, const char *prefix); 503 504 /* case insensitive version of str_prefix 505 * I 506 */ 507 int str_caseprefix (const char *text, const char *prefix); 508 509 /* create a key that is used by str_key_collate 510 * I 511 */ 512 char *str_create_key (const char *text, gboolean case_sen); 513 514 /* create a key that is used by str_key_collate 515 * should aware dot '.' in text 516 * I 517 */ 518 char *str_create_key_for_filename (const char *text, gboolean case_sen); 519 520 /* compare two string using LC_COLLATE, if is possible 521 * if case_sen is set, comparing is case sensitive, 522 * case_sen must be same for str_create_key, str_key_collate and str_release_key 523 * I 524 */ 525 int str_key_collate (const char *t1, const char *t2, gboolean case_sen); 526 527 /* release_key created by str_create_key, only right way to release key 528 * I 529 */ 530 void str_release_key (char *key, gboolean case_sen); 531 532 /* return TRUE if codeset_name is utf8 or utf-8 533 * I 534 */ 535 gboolean str_isutf8 (const char *codeset_name); 536 537 const char *str_detect_termencoding (void); 538 539 int str_verscmp (const char *s1, const char *s2); 540 541 /* Like filevercmp, except compare the byte arrays a (of length alen) and b (of length blen) 542 so that a and b can contain '\0', which sorts just before '\1'. But if alen is -1 treat 543 a as a string terminated by '\0', and similarly for blen. 544 */ 545 int filenvercmp (char const *a, ssize_t alen, char const *b, ssize_t blen); 546 547 548 /* return how many lines and columns will text occupy on terminal 549 */ 550 void str_msg_term_size (const char *text, int *lines, int *columns); 551 552 /** 553 * skip first needle's in haystack 554 * 555 * @param haystack pointer to string 556 * @param needle pointer to string 557 * @param skip_count skip first bytes 558 * 559 * @return pointer to skip_count+1 needle (or NULL if not found). 560 */ 561 562 char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count); 563 564 char *str_replace_all (const char *haystack, const char *needle, const char *replacement); 565 566 GPtrArray *str_tokenize (const char *string); 567 568 strtol_error_t xstrtoumax (const char *nptr, char **endptr, int base, uintmax_t * val, 569 const char *valid_suffixes); 570 uintmax_t parse_integer (const char *str, gboolean * invalid); 571 572 char *str_escape (const char *src, gsize src_len, const char *escaped_chars, 573 gboolean escape_non_printable); 574 char *str_unescape (const char *src, gsize src_len, const char *unescaped_chars, 575 gboolean unescape_non_printable); 576 char *str_shell_unescape (const char *text); 577 char *str_shell_escape (const char *text); 578 579 char *str_glob_escape (const char *text); 580 char *str_glob_unescape (const char *text); 581 582 char *str_regex_escape (const char *text); 583 char *str_regex_unescape (const char *text); 584 585 gboolean str_is_char_escaped (const char *start, const char *current); 586 587 /* --------------------------------------------------------------------------------------------- */ 588 /*** inline functions ****************************************************************************/ 589 /* --------------------------------------------------------------------------------------------- */ 590 591 static inline void 592 str_replace (char *s, char from, char to) 593 { 594 for (; *s != '\0'; s++) 595 { 596 if (*s == from) 597 *s = to; 598 } 599 } 600 601 /* --------------------------------------------------------------------------------------------- */ 602 /* 603 * strcpy is unsafe on overlapping memory areas, so define memmove-alike 604 * string function. 605 * Have sense only when: 606 * * dest <= src 607 * AND 608 * * dest and str are pointers to one object (as Roland Illig pointed). 609 * 610 * We can't use str*cpy funs here: 611 * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294 612 * 613 * @param dest pointer to string 614 * @param src pointer to string 615 * 616 * @return newly allocated string 617 * 618 */ 619 620 static inline char * 621 str_move (char *dest, const char *src) /* */ 622 { 623 size_t n; 624 625 g_assert (dest <= src); 626 627 n = strlen (src) + 1; /* + '\0' */ 628 629 return (char *) memmove (dest, src, n); 630 } 631 632 /* --------------------------------------------------------------------------------------------- */ 633 /* Compare version strings: 634 635 Compare strings a and b as file names containing version numbers, and return an integer 636 that is negative, zero, or positive depending on whether a compares less than, equal to, 637 or greater than b. 638 639 Use the following version sort algorithm: 640 641 1. Compare the strings' maximal-length non-digit prefixes lexically. 642 If there is a difference return that difference. 643 Otherwise discard the prefixes and continue with the next step. 644 645 2. Compare the strings' maximal-length digit prefixes, using numeric comparison 646 of the numbers represented by each prefix. (Treat an empty prefix as zero; this can 647 happen only at string end.) 648 If there is a difference, return that difference. 649 Otherwise discard the prefixes and continue with the next step. 650 651 3. If both strings are empty, return 0. Otherwise continue with step 1. 652 653 In version sort, lexical comparison is left to right, byte by byte, using the byte's numeric 654 value (0-255), except that: 655 656 1. ASCII letters sort before other bytes. 657 2. A tilde sorts before anything, even an empty string. 658 659 In addition to the version sort rules, the following strings have special priority and sort 660 before all other strings (listed in order): 661 662 1. The empty string. 663 2. ".". 664 3. "..". 665 4. Strings starting with "." sort before other strings. 666 667 Before comparing two strings where both begin with non-".", or where both begin with "." 668 but neither is "." or "..", suffixes matching the C-locale extended regular expression 669 (\.[A-Za-z~][A-Za-z0-9~]*)*$ are removed and the strings compared without them, using version sort 670 without special priority; if they do not compare equal, this comparison result is used and 671 the suffixes are effectively ignored. Otherwise, the entire strings are compared using version sort. 672 When removing a suffix from a nonempty string, remove the maximal-length suffix such that 673 the remaining string is nonempty. 674 */ 675 676 static inline int 677 filevercmp (const char *s1, const char *s2) /* */ 678 { 679 return filenvercmp (s1, -1, s2, -1); 680 } 681 682 /* --------------------------------------------------------------------------------------------- */ 683 684 #endif /* MC_STRUTIL_H */