root/lib/strutil.h

/* [previous][next][first][last][top][bottom][index][help]  */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. str_printf
  2. str_move

   1 #ifndef MC_STRUTIL_H
   2 #define MC_STRUTIL_H
   3 
   4 #include "lib/global.h"         /* include glib.h */
   5 
   6 #include <sys/types.h>
   7 #include <inttypes.h>
   8 #include <string.h>
   9 
  10 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
  11  * There are two sort of functions:
  12  * 1. functions for working with growing strings and conversion strings between
  13  *    different encodings.
  14  *    (implemented directly in strutil.c)
  15  * 2. functions, that hide differences between encodings derived from ASCII.
  16  *    (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
  17  * documentation is made for UTF-8 version of functions.
  18  */
  19 
  20 /* invalid strings
  21  * function, that works with invalid strings are marked with "I" 
  22  * in documentation
  23  * invalid bytes of string are handled as one byte characters with width 1, they
  24  * are displayed as questionmarks, I-maked comparing functions try to keep 
  25  * the original value of these bytes.
  26  */
  27 
  28 /* combining characters
  29  * displaynig: all handled as zero with characters, expect combing character 
  30  * at the begin of string, this character has with one (space add before), 
  31  * so str_term_width is not good for computing width of singles characters 
  32  * (never return zero, expect emtpy string)
  33  * for compatibility are strings composed before displaynig
  34  * comparing: comparing decompose all string before comparing, n-compare 
  35  * functions do not work as is usual, because same strings do not have to be 
  36  * same length in UTF-8. So they return 0 if one string is prefix of the other 
  37  * one. 
  38  * str_prefix is used to determine, how many characters from one string are 
  39  * prefix in second string. However, str_prefix return number of characters in
  40  * decompose form. (used in do_search (screen.c))
  41  */
  42 
  43 /*** typedefs(not structures) and defined constants **********************************************/
  44 
  45 #define IS_FIT(x) ((x) & 0x0010)
  46 #define MAKE_FIT(x) ((x) | 0x0010)
  47 #define HIDE_FIT(x) ((x) & 0x000f)
  48 
  49 #define INVALID_CONV ((GIConv) (-1))
  50 
  51 /*** enums ***************************************************************************************/
  52 
  53 /* results of conversion function
  54  */
  55 typedef enum
  56 {
  57     /* success means, that convertion has been finished successully
  58      */
  59     ESTR_SUCCESS = 0,
  60     /* problem means, that not every characters was successfully converted (They are
  61      * replaced with questionmark). So is impossible convert string back. 
  62      */
  63     ESTR_PROBLEM = 1,
  64     /* failure means, that conversion is not possible (example: wrong encoding 
  65      * of input string)
  66      */
  67     ESTR_FAILURE = 2
  68 } estr_t;
  69 
  70 /* alignment strings on terminal
  71  */
  72 typedef enum
  73 {
  74     J_LEFT = 0x01,
  75     J_RIGHT = 0x02,
  76     J_CENTER = 0x03,
  77     /* if there is enough space for string on terminal,
  78      * string is centered otherwise is aligned to left */
  79     J_CENTER_LEFT = 0x04,
  80     /* fit alignment, if string is to long, is truncated with '~' */
  81     J_LEFT_FIT = 0x11,
  82     J_RIGHT_FIT = 0x12,
  83     J_CENTER_FIT = 0x13,
  84     J_CENTER_LEFT_FIT = 0x14
  85 } align_crt_t;
  86 
  87 /* string-to-integer parsing results
  88  */
  89 typedef enum
  90 {
  91     LONGINT_OK = 0,
  92 
  93     /* These two values can be ORed together, to indicate that both errors occurred. */
  94     LONGINT_OVERFLOW = 1,
  95     LONGINT_INVALID_SUFFIX_CHAR = 2,
  96 
  97     LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW = (LONGINT_INVALID_SUFFIX_CHAR | LONGINT_OVERFLOW),
  98     LONGINT_INVALID = 4
  99 } strtol_error_t;
 100 
 101 /*** structures declarations (and typedefs of structures)*****************************************/
 102 
 103 /* all functions in str_class must be defined for every encoding */
 104 struct str_class
 105 {
 106     /* *INDENT-OFF* */
 107     gchar *(*conv_gerror_message) (GError * error, const char *def_msg);
 108       /*I*/ estr_t (*vfs_convert_to) (GIConv coder, const char *string, int size, GString * buffer);
 109       /*I*/ void (*insert_replace_char) (GString * buffer);
 110     gboolean (*is_valid_string) (const char *);
 111       /*I*/ int (*is_valid_char) (const char *, size_t);
 112       /*I*/ void (*cnext_char) (const char **);
 113     void (*cprev_char) (const char **);
 114     void (*cnext_char_safe) (const char **);
 115       /*I*/ void (*cprev_char_safe) (const char **);
 116       /*I*/ int (*cnext_noncomb_char) (const char **text);
 117       /*I*/ int (*cprev_noncomb_char) (const char **text, const char *begin);
 118       /*I*/ gboolean (*char_isspace) (const char *);
 119       /*I*/ gboolean (*char_ispunct) (const char *);
 120       /*I*/ gboolean (*char_isalnum) (const char *);
 121       /*I*/ gboolean (*char_isdigit) (const char *);
 122       /*I*/ gboolean (*char_isprint) (const char *);
 123       /*I*/ gboolean (*char_iscombiningmark) (const char *);
 124       /*I*/ int (*length) (const char *);
 125       /*I*/ int (*length2) (const char *, int);
 126       /*I*/ int (*length_noncomb) (const char *);
 127       /*I*/ gboolean (*char_toupper) (const char *, char **, size_t *);
 128     gboolean (*char_tolower) (const char *, char **, size_t *);
 129     void (*fix_string) (char *);
 130       /*I*/ const char *(*term_form) (const char *);
 131       /*I*/ const char *(*fit_to_term) (const char *, int, align_crt_t);
 132       /*I*/ const char *(*term_trim) (const char *text, int width);
 133       /*I*/ const char *(*term_substring) (const char *, int, int);
 134       /*I*/ int (*term_width1) (const char *);
 135       /*I*/ int (*term_width2) (const char *, size_t);
 136       /*I*/ int (*term_char_width) (const char *);
 137       /*I*/ const char *(*trunc) (const char *, int);
 138       /*I*/ int (*offset_to_pos) (const char *, size_t);
 139       /*I*/ int (*column_to_pos) (const char *, size_t);
 140       /*I*/ char *(*create_search_needle) (const char *, gboolean);
 141     void (*release_search_needle) (char *, gboolean);
 142     const char *(*search_first) (const char *, const char *, gboolean);
 143     const char *(*search_last) (const char *, const char *, gboolean);
 144     int (*compare) (const char *, const char *);
 145       /*I*/ int (*ncompare) (const char *, const char *);
 146       /*I*/ int (*casecmp) (const char *, const char *);
 147       /*I*/ int (*ncasecmp) (const char *, const char *);
 148       /*I*/ int (*prefix) (const char *, const char *);
 149       /*I*/ int (*caseprefix) (const char *, const char *);
 150       /*I*/ char *(*create_key) (const char *text, gboolean case_sen);
 151       /*I*/ char *(*create_key_for_filename) (const char *text, gboolean case_sen);
 152       /*I*/ int (*key_collate) (const char *t1, const char *t2, gboolean case_sen);
 153       /*I*/ void (*release_key) (char *key, gboolean case_sen);
 154     /* *INDENT-ON* */
 155 };
 156 
 157 /*** global variables defined in .c file *********************************************************/
 158 
 159 /* standard convertors */
 160 extern GIConv str_cnv_to_term;
 161 extern GIConv str_cnv_from_term;
 162 /* from terminal encoding to terminal encoding */
 163 extern GIConv str_cnv_not_convert;
 164 
 165 /*** declarations of public functions ************************************************************/
 166 
 167 struct str_class str_utf8_init (void);
 168 struct str_class str_8bit_init (void);
 169 struct str_class str_ascii_init (void);
 170 
 171 /* create convertor from "from_enc" to terminal encoding
 172  * if "from_enc" is not supported return INVALID_CONV 
 173  */
 174 GIConv str_crt_conv_from (const char *);
 175 
 176 /* create convertor from terminal encoding to "to_enc"
 177  * if "to_enc" is not supported return INVALID_CONV 
 178  */
 179 GIConv str_crt_conv_to (const char *);
 180 
 181 /* close convertor, do not close str_cnv_to_term, str_cnv_from_term, 
 182  * str_cnv_not_convert 
 183  */
 184 void str_close_conv (GIConv);
 185 
 186 /* return on of not used buffers (.used == 0) or create new
 187  * returned buffer has set .used to 1
 188  */
 189 
 190 /* convert string using coder, result of conversion is appended at end of buffer
 191  * return ESTR_SUCCESS if there was no problem.
 192  * otherwise return  ESTR_PROBLEM or ESTR_FAILURE
 193  */
 194 estr_t str_convert (GIConv, const char *, GString *);
 195 estr_t str_nconvert (GIConv, const char *, int, GString *);
 196 
 197 /* convert GError message (which in UTF-8) to terminal charset
 198  * def_char is used if result of error->str conversion if ESTR_FAILURE
 199  * return new allocated null-terminated string, which is need to be freed
 200  * I
 201  */
 202 gchar *str_conv_gerror_message (GError * error, const char *def_msg);
 203 
 204 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
 205  * result to original string. (so no replace with questionmark)
 206  * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
 207  * so is possible to show file, that is not valid in terminal encoding
 208  */
 209 estr_t str_vfs_convert_from (GIConv, const char *, GString *);
 210 
 211 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
 212  * does replace with questionmark 
 213  * I
 214  */
 215 estr_t str_vfs_convert_to (GIConv, const char *, int, GString *);
 216 
 217 /* printf function for str_buffer, append result of printf at the end of buffer
 218  */
 219 /* *INDENT-OFF* */
 220 void str_printf (GString * buffer, const char *format, ...) G_GNUC_PRINTF (2, 3);
     /* [previous][next][first][last][top][bottom][index][help]  */
 221 /* *INDENT-ON* */
 222 
 223 /* add standard replacement character in terminal encoding
 224  */
 225 void str_insert_replace_char (GString *);
 226 
 227 /* init strings and set terminal encoding,
 228  * if is termenc NULL, detect terminal encoding
 229  * create all str_cnv_* and set functions for terminal encoding
 230  */
 231 void str_init_strings (const char *termenc);
 232 
 233 /* free all str_buffer and all str_cnv_*
 234  */
 235 void str_uninit_strings (void);
 236 
 237 /* try convert characters in ch to output using conv
 238  * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII 
 239  *     compatible encoding, for other must be set)
 240  * return ESTR_SUCCESS if conversion was successfully,
 241  * ESTR_PROBLEM if ch contains only part of characters,
 242  * ESTR_FAILURE if conversion is not possible
 243  */
 244 estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size,
 245                            char *output, size_t out_size);
 246 
 247 /* test, if text is valid in terminal encoding
 248  * I
 249  */
 250 gboolean str_is_valid_string (const char *text);
 251 
 252 /* test, if first char of ch is valid
 253  * size, how many bytes characters occupied, could be (size_t)(-1)
 254  * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of 
 255  * multibyte character 
 256  * I
 257  */
 258 int str_is_valid_char (const char *ch, size_t size);
 259 
 260 /* return next characters after text, do not call on the end of string
 261  */
 262 char *str_get_next_char (char *text);
 263 const char *str_cget_next_char (const char *text);
 264 
 265 /* return previous characters before text, do not call on the start of strings
 266  */
 267 char *str_get_prev_char (char *text);
 268 const char *str_cget_prev_char (const char *text);
 269 
 270 /* set text to next characters, do not call on the end of string
 271  */
 272 void str_next_char (char **text);
 273 void str_cnext_char (const char **text);
 274 
 275 /* set text to previous characters, do not call on the start of strings
 276  */
 277 void str_prev_char (char **text);
 278 void str_cprev_char (const char **text);
 279 
 280 /* return next characters after text, do not call on the end of string
 281  * works with invalid string 
 282  * I
 283  */
 284 char *str_get_next_char_safe (char *text);
 285 const char *str_cget_next_char_safe (const char *text);
 286 
 287 /* return previous characters before text, do not call on the start of strings
 288  * works with invalid string 
 289  * I
 290  */
 291 char *str_get_prev_char_safe (char *text);
 292 const char *str_cget_prev_char_safe (const char *text);
 293 
 294 /* set text to next characters, do not call on the end of string
 295  * works with invalid string 
 296  * I
 297  */
 298 void str_next_char_safe (char **text);
 299 void str_cnext_char_safe (const char **text);
 300 
 301 /* set text to previous characters, do not call on the start of strings
 302  * works with invalid string 
 303  * I
 304  */
 305 void str_prev_char_safe (char **text);
 306 void str_cprev_char_safe (const char **text);
 307 
 308 /* set text to next noncombining characters, check the end of text
 309  * return how many characters was skipped
 310  * works with invalid string 
 311  * I
 312  */
 313 int str_next_noncomb_char (char **text);
 314 int str_cnext_noncomb_char (const char **text);
 315 
 316 /* set text to previous noncombining characters, search stop at begin 
 317  * return how many characters was skipped
 318  * works with invalid string 
 319  * I
 320  */
 321 int str_prev_noncomb_char (char **text, const char *begin);
 322 int str_cprev_noncomb_char (const char **text, const char *begin);
 323 
 324 /* if first characters in ch is space, tabulator  or new lines
 325  * I
 326  */
 327 gboolean str_isspace (const char *ch);
 328 
 329 /* if first characters in ch is punctuation or symbol
 330  * I
 331  */
 332 gboolean str_ispunct (const char *ch);
 333 
 334 /* if first characters in ch is alphanum
 335  * I
 336  */
 337 gboolean str_isalnum (const char *ch);
 338 
 339 /* if first characters in ch is digit
 340  * I
 341  */
 342 gboolean str_isdigit (const char *ch);
 343 
 344 /* if first characters in ch is printable
 345  * I
 346  */
 347 gboolean str_isprint (const char *ch);
 348 
 349 /* if first characters in ch is a combining mark (only in utf-8)
 350  * combining makrs are assumed to be zero width 
 351  * I
 352  */
 353 gboolean str_iscombiningmark (const char *ch);
 354 
 355 /* write lower from of fisrt characters in ch into out
 356  * decrase remain by size of returned characters
 357  * if out is not big enough, do nothing
 358  */
 359 gboolean str_toupper (const char *ch, char **out, size_t * remain);
 360 
 361 /* write upper from of fisrt characters in ch into out
 362  * decrase remain by size of returned characters
 363  * if out is not big enough, do nothing
 364  */
 365 gboolean str_tolower (const char *ch, char **out, size_t * remain);
 366 
 367 /* return length of text in characters
 368  * I
 369  */
 370 int str_length (const char *text);
 371 
 372 /* return length of text in characters, limit to size
 373  * I
 374  */
 375 int str_length2 (const char *text, int size);
 376 
 377 /* return length of one char
 378  * I
 379  */
 380 int str_length_char (const char *);
 381 
 382 /* return length of text in characters, count only noncombining characters
 383  * I
 384  */
 385 int str_length_noncomb (const char *text);
 386 
 387 /* replace all invalid characters in text with questionmark
 388  * after return, text is valid string in terminal encoding
 389  * I
 390  */
 391 void str_fix_string (char *text);
 392 
 393 /* replace all invalid characters in text with questionmark
 394  * replace all unprintable characters with '.'
 395  * return static allocated string, "text" is not changed
 396  * returned string do not need to be freed
 397  * I
 398  */
 399 const char *str_term_form (const char *text);
 400 
 401 /* like str_term_form, but text can be alignment to width
 402  * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
 403  * result is completed with spaces to width
 404  * I
 405  */
 406 const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode);
 407 
 408 /* like str_term_form, but when text is wider than width, three dots are
 409  * inserted at begin and result is completed with suffix of text
 410  * no additional spaces are inserted
 411  * I
 412  */
 413 const char *str_term_trim (const char *text, int width);
 414 
 415 
 416 /* like str_term_form, but return only specified substring
 417  * start - column (position) on terminal, where substring begin
 418  * result is completed with spaces to width
 419  * I
 420  */
 421 const char *str_term_substring (const char *text, int start, int width);
 422 
 423 /* return width, that will be text occupied on terminal
 424  * I
 425  */
 426 int str_term_width1 (const char *text);
 427 
 428 /* return width, that will be text occupied on terminal
 429  * text is limited by length in characters
 430  * I
 431  */
 432 int str_term_width2 (const char *text, size_t length);
 433 
 434 /* return width, that will be character occupied on terminal
 435  * combining characters are always zero width
 436  * I
 437  */
 438 int str_term_char_width (const char *text);
 439 
 440 /* convert position in characters to position in bytes 
 441  * I
 442  */
 443 int str_offset_to_pos (const char *text, size_t length);
 444 
 445 /* convert position on terminal to position in characters
 446  * I
 447  */
 448 int str_column_to_pos (const char *text, size_t pos);
 449 
 450 /* like str_fit_to_term width just_mode = J_LEFT_FIT, 
 451  * but do not insert additional spaces
 452  * I
 453  */
 454 const char *str_trunc (const char *text, int width);
 455 
 456 /* create needle, that will be searched in str_search_fist/last,
 457  * so needle can be reused
 458  * in UTF-8 return normalized form of needle
 459  */
 460 char *str_create_search_needle (const char *needle, gboolean case_sen);
 461 
 462 /* free needle returned by str_create_search_needle
 463  */
 464 void str_release_search_needle (char *needle, gboolean case_sen);
 465 
 466 /* search for first occurrence of search in text
 467  */
 468 const char *str_search_first (const char *text, const char *needle, gboolean case_sen);
 469 
 470 /* search for last occurrence of search in text
 471  */
 472 const char *str_search_last (const char *text, const char *needle, gboolean case_sen);
 473 
 474 /* case sensitive compare two strings
 475  * I
 476  */
 477 int str_compare (const char *t1, const char *t2);
 478 
 479 /* case sensitive compare two strings
 480  * if one string is prefix of the other string, return 0
 481  * I
 482  */
 483 int str_ncompare (const char *t1, const char *t2);
 484 
 485 /* case insensitive compare two strings
 486  * I
 487  */
 488 int str_casecmp (const char *t1, const char *t2);
 489 
 490 /* case insensitive compare two strings
 491  * if one string is prefix of the other string, return 0
 492  * I
 493  */
 494 int str_ncasecmp (const char *t1, const char *t2);
 495 
 496 /* return, how many bytes are are same from start in text and prefix
 497  * both strings are decomposed before comparing and return value is counted
 498  * in decomposed form, too. calling with prefix, prefix, you get size in bytes
 499  * of prefix in decomposed form,
 500  * I
 501  */
 502 int str_prefix (const char *text, const char *prefix);
 503 
 504 /* case insensitive version of str_prefix
 505  * I
 506  */
 507 int str_caseprefix (const char *text, const char *prefix);
 508 
 509 /* create a key that is used by str_key_collate
 510  * I
 511  */
 512 char *str_create_key (const char *text, gboolean case_sen);
 513 
 514 /* create a key that is used by str_key_collate
 515  * should aware dot '.' in text
 516  * I
 517  */
 518 char *str_create_key_for_filename (const char *text, gboolean case_sen);
 519 
 520 /* compare two string using LC_COLLATE, if is possible
 521  * if case_sen is set, comparing is case sensitive,
 522  * case_sen must be same for str_create_key, str_key_collate and str_release_key
 523  * I
 524  */
 525 int str_key_collate (const char *t1, const char *t2, gboolean case_sen);
 526 
 527 /* release_key created by str_create_key, only rigth way to release key
 528  * I
 529  */
 530 void str_release_key (char *key, gboolean case_sen);
 531 
 532 /* return TRUE if codeset_name is utf8 or utf-8
 533  * I
 534  */
 535 gboolean str_isutf8 (const char *codeset_name);
 536 
 537 const char *str_detect_termencoding (void);
 538 
 539 int str_verscmp (const char *s1, const char *s2);
 540 
 541 /* Compare version strings:
 542 
 543    This function compares strings s1 and s2:
 544    1) By PREFIX in the same way as strcmp.
 545    2) Then by VERSION (most similarly to version compare of Debian's dpkg).
 546    Leading zeros in version numbers are ignored.
 547    3) If both (PREFIX and  VERSION) are equal, strcmp function is used for
 548    comparison. So this function can return 0 if (and only if) strings s1
 549    and s2 are identical.
 550 
 551    It returns number > 0 for s1 > s2, 0 for s1 == s2 and number < 0 for s1 < s2.
 552 
 553    This function compares strings, in a way that if VER1 and VER2 are version
 554    numbers and PREFIX and SUFFIX (SUFFIX defined as (\.[A-Za-z~][A-Za-z0-9~]*)*)
 555    are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX,
 556    PREFIX VER2 SUFFIX) < 0.
 557 
 558    This function is intended to be a replacement for strverscmp.
 559  */
 560 int filevercmp (const char *s1, const char *s2);
 561 
 562 
 563 /* return how many lines and columns will text occupy on terminal
 564  */
 565 void str_msg_term_size (const char *text, int *lines, int *columns);
 566 
 567 /**
 568  * skip first needle's in haystack
 569  *
 570  * @param haystack pointer to string
 571  * @param needle pointer to string
 572  * @param skip_count skip first bytes
 573  *
 574  * @return pointer to skip_count+1 needle (or NULL if not found).
 575  */
 576 
 577 char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count);
 578 
 579 char *str_replace_all (const char *haystack, const char *needle, const char *replacement);
 580 
 581 strtol_error_t xstrtoumax (const char *s, char **ptr, int base, uintmax_t * val,
 582                            const char *valid_suffixes);
 583 uintmax_t parse_integer (const char *str, gboolean * invalid);
 584 
 585 /* --------------------------------------------------------------------------------------------- */
 586 /*** inline functions ****************************************************************************/
 587 /* --------------------------------------------------------------------------------------------- */
 588 
 589 static inline void
 590 str_replace (char *s, char from, char to)
 591 {
 592     for (; *s != '\0'; s++)
 593     {
 594         if (*s == from)
 595             *s = to;
 596     }
 597 }
 598 
 599 /* --------------------------------------------------------------------------------------------- */
 600 /*
 601  * strcpy is unsafe on overlapping memory areas, so define memmove-alike
 602  * string function.
 603  * Have sense only when:
 604  *  * dest <= src
 605  *   AND
 606  *  * dest and str are pointers to one object (as Roland Illig pointed).
 607  *
 608  * We can't use str*cpy funs here:
 609  * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294
 610  *
 611  * @param dest pointer to string
 612  * @param src pointer to string
 613  *
 614  * @return newly allocated string
 615  *
 616  */
 617 
 618 static inline char *
 619 str_move (char *dest, const char *src)
     /* [previous][next][first][last][top][bottom][index][help]  */
 620 {
 621     size_t n;
 622 
 623     g_assert (dest <= src);
 624 
 625     n = strlen (src) + 1;       /* + '\0' */
 626 
 627     return (char *) memmove (dest, src, n);
 628 }
 629 
 630 /* --------------------------------------------------------------------------------------------- */
 631 
 632 #endif /* MC_STRUTIL_H */

/* [previous][next][first][last][top][bottom][index][help]  */