This source file includes following definitions.
- mc_search__regex_str_append_if_special
- mc_search__cond_struct_new_regex_hex_add
- mc_search__cond_struct_new_regex_accum_append
- mc_search__cond_struct_new_regex_ci_str
- mc_search__g_regex_match_full_safe
- mc_search__regex_found_cond_one
- mc_search__regex_found_cond
- mc_search_regex__get_max_num_of_replace_tokens
- mc_search_regex__get_token_by_num
- mc_search_regex__replace_handle_esc_seq
- mc_search_regex__process_replace_str
- mc_search_regex__process_append_str
- mc_search_regex__process_escape_sequence
- mc_search__cond_struct_new_init_regex
- mc_search__run_regex
- mc_search_regex_prepare_replace_str
- mc_search_get_line_type
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include <config.h>
30
31 #include <stdlib.h>
32
33 #include "lib/global.h"
34 #include "lib/strutil.h"
35 #include "lib/search.h"
36 #include "lib/util.h"
37
38 #include "internal.h"
39
40
41
42
43
44 #define REPLACE_PREPARE_T_NOTHING_SPECIAL -1
45 #define REPLACE_PREPARE_T_REPLACE_FLAG -2
46 #define REPLACE_PREPARE_T_ESCAPE_SEQ -3
47
48
49
50 typedef enum
51 {
52 REPLACE_T_NO_TRANSFORM = 0,
53 REPLACE_T_UPP_TRANSFORM_CHAR = 1,
54 REPLACE_T_LOW_TRANSFORM_CHAR = 2,
55 REPLACE_T_UPP_TRANSFORM = 4,
56 REPLACE_T_LOW_TRANSFORM = 8
57 } replace_transform_type_t;
58
59
60
61
62
63
64
65
66
67 static gboolean
68 mc_search__regex_str_append_if_special (GString *copy_to, const GString *regex_str, gsize *offset)
69 {
70 const char *special_chars[] = {
71 "\\a",
72 "\\b",
73 "\\B",
74 "\\c",
75 "\\C",
76 "\\d",
77 "\\D",
78 "\\e",
79 "\\E",
80 "\\f",
81 "\\l",
82 "\\L",
83 "\\n",
84 "\\Q",
85 "\\r",
86 "\\s",
87 "\\S",
88 "\\t",
89 "\\u",
90 "\\U",
91 "\\w",
92 "\\W",
93 "\\x",
94 "\\X",
95 NULL,
96 };
97
98 char *tmp_regex_str;
99 const char **spec_chr;
100
101 tmp_regex_str = &(regex_str->str[*offset]);
102
103 for (spec_chr = special_chars; *spec_chr != NULL; spec_chr++)
104 {
105 gsize spec_chr_len;
106
107 spec_chr_len = strlen (*spec_chr);
108
109 if (strncmp (tmp_regex_str, *spec_chr, spec_chr_len) == 0
110 && !str_is_char_escaped (regex_str->str, tmp_regex_str))
111 {
112 if (strncmp ("\\x", *spec_chr, spec_chr_len) == 0)
113 {
114 if (tmp_regex_str[spec_chr_len] != '{')
115 spec_chr_len += 2;
116 else
117 {
118 while ((spec_chr_len < regex_str->len - *offset)
119 && tmp_regex_str[spec_chr_len] != '}')
120 spec_chr_len++;
121 if (tmp_regex_str[spec_chr_len] == '}')
122 spec_chr_len++;
123 }
124 }
125 g_string_append_len (copy_to, tmp_regex_str, spec_chr_len);
126 *offset += spec_chr_len;
127 return TRUE;
128 }
129 }
130
131 return FALSE;
132 }
133
134
135
136 static void
137 mc_search__cond_struct_new_regex_hex_add (const char *charset, GString *str_to,
138 const GString *one_char)
139 {
140 GString *upp, *low;
141 gsize loop;
142
143 upp = mc_search__toupper_case_str (charset, one_char);
144 low = mc_search__tolower_case_str (charset, one_char);
145
146 for (loop = 0; loop < upp->len; loop++)
147 {
148 gchar tmp_str[10 + 1];
149 gint tmp_len;
150
151 if (loop >= low->len || upp->str[loop] == low->str[loop])
152 tmp_len =
153 g_snprintf (tmp_str, sizeof (tmp_str), "\\x%02X", (unsigned char) upp->str[loop]);
154 else
155 tmp_len = g_snprintf (tmp_str, sizeof (tmp_str), "[\\x%02X\\x%02X]",
156 (unsigned char) upp->str[loop], (unsigned char) low->str[loop]);
157
158 g_string_append_len (str_to, tmp_str, tmp_len);
159 }
160
161 g_string_free (upp, TRUE);
162 g_string_free (low, TRUE);
163 }
164
165
166
167 static void
168 mc_search__cond_struct_new_regex_accum_append (const char *charset, GString *str_to,
169 GString *str_from)
170 {
171 GString *recoded_part;
172 gsize loop = 0;
173
174 recoded_part = g_string_sized_new (32);
175
176 while (loop < str_from->len)
177 {
178 GString *one_char;
179 gboolean just_letters;
180
181 one_char = mc_search__get_one_symbol (charset, str_from->str + loop,
182 MIN (str_from->len - loop, 6), &just_letters);
183
184 if (one_char->len == 0)
185 loop++;
186 else
187 {
188 loop += one_char->len;
189
190 if (just_letters)
191 mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char);
192 else
193 g_string_append_len (recoded_part, one_char->str, one_char->len);
194 }
195
196 g_string_free (one_char, TRUE);
197 }
198
199 g_string_append_len (str_to, recoded_part->str, recoded_part->len);
200 g_string_free (recoded_part, TRUE);
201 g_string_set_size (str_from, 0);
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 static GString *
220 mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString *astr)
221 {
222 GString *accumulator, *spec_char, *ret_str;
223 gsize loop;
224
225 ret_str = g_string_sized_new (64);
226 accumulator = g_string_sized_new (64);
227 spec_char = g_string_sized_new (64);
228 loop = 0;
229
230 while (loop < astr->len)
231 {
232 if (mc_search__regex_str_append_if_special (spec_char, astr, &loop))
233 {
234 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
235 g_string_append_len (ret_str, spec_char->str, spec_char->len);
236 g_string_set_size (spec_char, 0);
237 continue;
238 }
239
240 if (astr->str[loop] == '[' && !str_is_char_escaped (astr->str, &(astr->str[loop])))
241 {
242 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
243
244 while (loop < astr->len
245 && !(astr->str[loop] == ']'
246 && !str_is_char_escaped (astr->str, &(astr->str[loop]))))
247 {
248 g_string_append_c (ret_str, astr->str[loop]);
249 loop++;
250 }
251
252 g_string_append_c (ret_str, astr->str[loop]);
253 loop++;
254 continue;
255 }
256
257
258
259 g_string_append_c (accumulator, astr->str[loop]);
260 loop++;
261 }
262 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
263
264 g_string_free (accumulator, TRUE);
265 g_string_free (spec_char, TRUE);
266
267 return ret_str;
268 }
269
270
271
272 #ifdef SEARCH_TYPE_GLIB
273
274
275
276
277 static gboolean
278 mc_search__g_regex_match_full_safe (const GRegex *regex, const gchar *string, gssize string_len,
279 gint start_position, GRegexMatchFlags match_options,
280 GMatchInfo **match_info, GError **error)
281 {
282 char *string_safe, *p, *end;
283 gboolean ret;
284
285 if (string_len < 0)
286 string_len = strlen (string);
287
288 if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW)
289 || g_utf8_validate (string, string_len, NULL))
290 {
291 return g_regex_match_full (regex, string, string_len, start_position, match_options,
292 match_info, error);
293 }
294
295
296 p = string_safe = g_malloc (string_len + 1);
297 memcpy (string_safe, string, string_len);
298 string_safe[string_len] = '\0';
299 end = p + string_len;
300
301 while (p < end)
302 {
303 gunichar c = g_utf8_get_char_validated (p, -1);
304 if (c != (gunichar) (-1) && c != (gunichar) (-2))
305 {
306 p = g_utf8_next_char (p);
307 }
308 else
309 {
310
311
312
313 *p++ = '\0';
314 }
315 }
316
317 ret = g_regex_match_full (regex, string_safe, string_len, start_position, match_options,
318 match_info, error);
319 g_free (string_safe);
320 return ret;
321 }
322 #endif
323
324
325
326 static mc_search__found_cond_t
327 mc_search__regex_found_cond_one (mc_search_t *lc_mc_search, mc_search_regex_t *regex,
328 GString *search_str)
329 {
330 #ifdef SEARCH_TYPE_GLIB
331 GError *mcerror = NULL;
332
333 if (!mc_search__g_regex_match_full_safe (regex, search_str->str, search_str->len, 0,
334 G_REGEX_MATCH_NEWLINE_ANY,
335 &lc_mc_search->regex_match_info, &mcerror))
336 {
337 g_match_info_free (lc_mc_search->regex_match_info);
338 lc_mc_search->regex_match_info = NULL;
339 if (mcerror != NULL)
340 {
341 lc_mc_search->error = MC_SEARCH_E_REGEX;
342 g_free (lc_mc_search->error_str);
343 lc_mc_search->error_str =
344 str_conv_gerror_message (mcerror, _ ("Regular expression error"));
345 g_error_free (mcerror);
346 return COND__FOUND_ERROR;
347 }
348 return COND__NOT_FOUND;
349 }
350 lc_mc_search->num_results = g_match_info_get_match_count (lc_mc_search->regex_match_info);
351 #else
352
353 lc_mc_search->num_results =
354 # ifdef HAVE_PCRE2
355 pcre2_match (regex, (unsigned char *) search_str->str, search_str->len, 0, 0,
356 lc_mc_search->regex_match_info, NULL);
357 # else
358 pcre_exec (regex, lc_mc_search->regex_match_info, search_str->str, search_str->len, 0, 0,
359 lc_mc_search->iovector, MC_SEARCH__NUM_REPLACE_ARGS);
360 # endif
361 if (lc_mc_search->num_results < 0)
362 {
363 return COND__NOT_FOUND;
364 }
365 #endif
366 return COND__FOUND_OK;
367 }
368
369
370
371 static mc_search__found_cond_t
372 mc_search__regex_found_cond (mc_search_t *lc_mc_search, GString *search_str)
373 {
374 gsize loop1;
375
376 for (loop1 = 0; loop1 < lc_mc_search->prepared.conditions->len; loop1++)
377 {
378 mc_search_cond_t *mc_search_cond;
379 mc_search__found_cond_t ret;
380
381 mc_search_cond =
382 (mc_search_cond_t *) g_ptr_array_index (lc_mc_search->prepared.conditions, loop1);
383
384 if (!mc_search_cond->regex_handle)
385 continue;
386
387 ret = mc_search__regex_found_cond_one (lc_mc_search, mc_search_cond->regex_handle,
388 search_str);
389 if (ret != COND__NOT_FOUND)
390 return ret;
391 }
392 return COND__NOT_ALL_FOUND;
393 }
394
395
396
397 static int
398 mc_search_regex__get_max_num_of_replace_tokens (const gchar *str, gsize len)
399 {
400 int max_token = 0;
401 gsize loop;
402
403 for (loop = 0; loop < len - 1; loop++)
404 if (str[loop] == '\\' && g_ascii_isdigit (str[loop + 1]))
405 {
406 if (str_is_char_escaped (str, &str[loop]))
407 continue;
408 if (max_token < str[loop + 1] - '0')
409 max_token = str[loop + 1] - '0';
410 }
411 else if (str[loop] == '$' && str[loop + 1] == '{')
412 {
413 gsize tmp_len;
414
415 if (str_is_char_escaped (str, &str[loop]))
416 continue;
417
418 for (tmp_len = 0;
419 loop + tmp_len + 2 < len && (str[loop + 2 + tmp_len] & (char) 0xf0) == 0x30;
420 tmp_len++)
421 ;
422
423 if (str[loop + 2 + tmp_len] == '}')
424 {
425 int tmp_token;
426 char *tmp_str;
427
428 tmp_str = g_strndup (&str[loop + 2], tmp_len);
429 tmp_token = atoi (tmp_str);
430 if (max_token < tmp_token)
431 max_token = tmp_token;
432 g_free (tmp_str);
433 }
434 }
435
436 return max_token;
437 }
438
439
440
441 static char *
442 mc_search_regex__get_token_by_num (const mc_search_t *lc_mc_search, gsize lc_index)
443 {
444 int fnd_start = 0, fnd_end = 0;
445
446 #ifdef SEARCH_TYPE_GLIB
447 g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &fnd_start, &fnd_end);
448 #else
449 fnd_start = lc_mc_search->iovector[lc_index * 2 + 0];
450 fnd_end = lc_mc_search->iovector[lc_index * 2 + 1];
451 #endif
452
453 if (fnd_end == fnd_start)
454 return g_strdup ("");
455
456 return g_strndup (lc_mc_search->regex_buffer->str + fnd_start, fnd_end - fnd_start);
457 }
458
459
460
461 static gboolean
462 mc_search_regex__replace_handle_esc_seq (const GString *replace_str, const gsize current_pos,
463 gsize *skip_len, int *ret)
464 {
465 char *curr_str = &(replace_str->str[current_pos]);
466 char c = curr_str[1];
467
468 if (replace_str->len > current_pos + 2)
469 {
470 if (c == '{')
471 {
472 for (*skip_len = 2;
473 current_pos + *skip_len < replace_str->len && curr_str[*skip_len] >= '0'
474 && curr_str[*skip_len] <= '7';
475 (*skip_len)++)
476 ;
477
478 if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}')
479 {
480 (*skip_len)++;
481 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
482 return FALSE;
483 }
484 else
485 {
486 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
487 return TRUE;
488 }
489 }
490
491 if (c == 'x')
492 {
493 *skip_len = 2;
494 c = curr_str[2];
495 if (c == '{')
496 {
497 for (*skip_len = 3;
498 current_pos + *skip_len < replace_str->len
499 && g_ascii_isxdigit ((guchar) curr_str[*skip_len]);
500 (*skip_len)++)
501 ;
502
503 if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}')
504 {
505 (*skip_len)++;
506 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
507 return FALSE;
508 }
509 else
510 {
511 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
512 return TRUE;
513 }
514 }
515 else if (!g_ascii_isxdigit ((guchar) c))
516 {
517 *skip_len = 2;
518 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
519 return FALSE;
520 }
521 else
522 {
523 c = curr_str[3];
524 if (!g_ascii_isxdigit ((guchar) c))
525 *skip_len = 3;
526 else
527 *skip_len = 4;
528 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
529 return FALSE;
530 }
531 }
532 }
533
534 if (strchr ("ntvbrfa", c) != NULL)
535 {
536 *skip_len = 2;
537 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
538 return FALSE;
539 }
540 return TRUE;
541 }
542
543
544
545 static int
546 mc_search_regex__process_replace_str (const GString *replace_str, const gsize current_pos,
547 gsize *skip_len, replace_transform_type_t *replace_flags)
548 {
549 int ret = -1;
550 const char *curr_str = &(replace_str->str[current_pos]);
551
552 if (current_pos > replace_str->len)
553 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
554
555 *skip_len = 0;
556
557 if (replace_str->len > current_pos + 2 && curr_str[0] == '$' && curr_str[1] == '{'
558 && (curr_str[2] & (char) 0xf0) == 0x30)
559 {
560 char *tmp_str;
561
562 if (str_is_char_escaped (replace_str->str, curr_str))
563 {
564 *skip_len = 1;
565 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
566 }
567
568 for (*skip_len = 0; current_pos + *skip_len + 2 < replace_str->len
569 && (curr_str[2 + *skip_len] & (char) 0xf0) == 0x30;
570 (*skip_len)++)
571 ;
572
573 if (curr_str[2 + *skip_len] != '}')
574 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
575
576 tmp_str = g_strndup (curr_str + 2, *skip_len);
577 if (tmp_str == NULL)
578 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
579
580 ret = atoi (tmp_str);
581 g_free (tmp_str);
582
583 *skip_len += 3;
584 return ret;
585 }
586
587 if (curr_str[0] == '\\' && replace_str->len > current_pos + 1)
588 {
589 if (str_is_char_escaped (replace_str->str, curr_str))
590 {
591 *skip_len = 1;
592 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
593 }
594
595 if (g_ascii_isdigit (curr_str[1]))
596 {
597 ret = g_ascii_digit_value (curr_str[1]);
598 *skip_len = 2;
599 return ret;
600 }
601
602 if (!mc_search_regex__replace_handle_esc_seq (replace_str, current_pos, skip_len, &ret))
603 return ret;
604
605 ret = REPLACE_PREPARE_T_REPLACE_FLAG;
606 *skip_len += 2;
607
608 switch (curr_str[1])
609 {
610 case 'U':
611 *replace_flags |= REPLACE_T_UPP_TRANSFORM;
612 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM;
613 break;
614 case 'u':
615 *replace_flags |= REPLACE_T_UPP_TRANSFORM_CHAR;
616 break;
617 case 'L':
618 *replace_flags |= REPLACE_T_LOW_TRANSFORM;
619 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM;
620 break;
621 case 'l':
622 *replace_flags |= REPLACE_T_LOW_TRANSFORM_CHAR;
623 break;
624 case 'E':
625 *replace_flags = REPLACE_T_NO_TRANSFORM;
626 break;
627 default:
628 ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
629 break;
630 }
631 }
632 return ret;
633 }
634
635
636
637 static void
638 mc_search_regex__process_append_str (GString *dest_str, const char *from, gsize len,
639 replace_transform_type_t *replace_flags)
640 {
641 gsize loop;
642 gsize char_len;
643
644 if (len == (gsize) (-1))
645 len = strlen (from);
646
647 if (*replace_flags == REPLACE_T_NO_TRANSFORM)
648 {
649 g_string_append_len (dest_str, from, len);
650 return;
651 }
652
653 for (loop = 0; loop < len; loop += char_len)
654 {
655 GString *tmp_string = NULL;
656 GString *s;
657
658 s = mc_search__get_one_symbol (NULL, from + loop, len - loop, NULL);
659 char_len = s->len;
660
661 if ((*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR) != 0)
662 {
663 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR;
664 tmp_string = mc_search__toupper_case_str (NULL, s);
665 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
666 }
667 else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR) != 0)
668 {
669 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR;
670 tmp_string = mc_search__tolower_case_str (NULL, s);
671 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
672 }
673 else if ((*replace_flags & REPLACE_T_UPP_TRANSFORM) != 0)
674 {
675 tmp_string = mc_search__toupper_case_str (NULL, s);
676 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
677 }
678 else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM) != 0)
679 {
680 tmp_string = mc_search__tolower_case_str (NULL, s);
681 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
682 }
683
684 g_string_free (s, TRUE);
685 if (tmp_string != NULL)
686 g_string_free (tmp_string, TRUE);
687 }
688 }
689
690
691
692 static void
693 mc_search_regex__process_escape_sequence (GString *dest_str, const char *from, gsize len,
694 replace_transform_type_t *replace_flags, gboolean is_utf8)
695 {
696 gsize i = 0;
697 unsigned int c = 0;
698 char b;
699
700 if (len == (gsize) (-1))
701 len = strlen (from);
702 if (len == 0)
703 return;
704
705 if (from[i] == '{')
706 i++;
707 if (i >= len)
708 return;
709
710 if (from[i] == 'x')
711 {
712 i++;
713 if (i < len && from[i] == '{')
714 i++;
715 for (; i < len; i++)
716 {
717 if (from[i] >= '0' && from[i] <= '9')
718 c = c * 16 + from[i] - '0';
719 else if (from[i] >= 'a' && from[i] <= 'f')
720 c = c * 16 + 10 + from[i] - 'a';
721 else if (from[i] >= 'A' && from[i] <= 'F')
722 c = c * 16 + 10 + from[i] - 'A';
723 else
724 break;
725 }
726 }
727 else if (from[i] >= '0' && from[i] <= '7')
728 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++)
729 c = c * 8 + from[i] - '0';
730 else
731 {
732 switch (from[i])
733 {
734 case 'n':
735 c = '\n';
736 break;
737 case 't':
738 c = '\t';
739 break;
740 case 'v':
741 c = '\v';
742 break;
743 case 'b':
744 c = '\b';
745 break;
746 case 'r':
747 c = '\r';
748 break;
749 case 'f':
750 c = '\f';
751 break;
752 case 'a':
753 c = '\a';
754 break;
755 default:
756 mc_search_regex__process_append_str (dest_str, from, len, replace_flags);
757 return;
758 }
759 }
760
761 if (c < 0x80 || !is_utf8)
762 g_string_append_c (dest_str, (char) c);
763 else if (c < 0x800)
764 {
765 b = 0xC0 | (c >> 6);
766 g_string_append_c (dest_str, b);
767 b = 0x80 | (c & 0x3F);
768 g_string_append_c (dest_str, b);
769 }
770 else if (c < 0x10000)
771 {
772 b = 0xE0 | (c >> 12);
773 g_string_append_c (dest_str, b);
774 b = 0x80 | ((c >> 6) & 0x3F);
775 g_string_append_c (dest_str, b);
776 b = 0x80 | (c & 0x3F);
777 g_string_append_c (dest_str, b);
778 }
779 else if (c < 0x10FFFF)
780 {
781 b = 0xF0 | (c >> 16);
782 g_string_append_c (dest_str, b);
783 b = 0x80 | ((c >> 12) & 0x3F);
784 g_string_append_c (dest_str, b);
785 b = 0x80 | ((c >> 6) & 0x3F);
786 g_string_append_c (dest_str, b);
787 b = 0x80 | (c & 0x3F);
788 g_string_append_c (dest_str, b);
789 }
790 }
791
792
793
794
795
796 void
797 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t *lc_mc_search,
798 mc_search_cond_t *mc_search_cond)
799 {
800 if (lc_mc_search->whole_words && !lc_mc_search->is_entire_line)
801 {
802
803
804
805 g_string_prepend (mc_search_cond->str, "(?<![\\p{L}\\p{N}_])");
806 g_string_append (mc_search_cond->str, "(?![\\p{L}\\p{N}_])");
807 }
808
809 {
810 #ifdef SEARCH_TYPE_GLIB
811 GError *mcerror = NULL;
812 GRegexCompileFlags g_regex_options = G_REGEX_OPTIMIZE | G_REGEX_DOTALL;
813
814 if (str_isutf8 (charset) && mc_global.utf8_display)
815 {
816 if (!lc_mc_search->is_case_sensitive)
817 g_regex_options |= G_REGEX_CASELESS;
818 }
819 else
820 {
821 g_regex_options |= G_REGEX_RAW;
822
823 if (!lc_mc_search->is_case_sensitive)
824 {
825 GString *tmp;
826
827 tmp = mc_search_cond->str;
828 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
829 g_string_free (tmp, TRUE);
830 }
831 }
832
833 mc_search_cond->regex_handle =
834 g_regex_new (mc_search_cond->str->str, g_regex_options, 0, &mcerror);
835
836 if (mcerror != NULL)
837 {
838 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
839 g_free (lc_mc_search->error_str);
840 lc_mc_search->error_str =
841 str_conv_gerror_message (mcerror, _ ("Regular expression error"));
842 g_error_free (mcerror);
843 return;
844 }
845 #else
846
847 # ifdef HAVE_PCRE2
848 int errcode;
849 char error[BUF_SMALL] = "";
850 size_t erroffset;
851 int pcre_options = PCRE2_MULTILINE;
852 # else
853 const char *error;
854 int erroffset;
855 int pcre_options = PCRE_EXTRA | PCRE_MULTILINE;
856 # endif
857
858 if (str_isutf8 (charset) && mc_global.utf8_display)
859 {
860 # ifdef HAVE_PCRE2
861 pcre_options |= PCRE2_UTF;
862 if (!lc_mc_search->is_case_sensitive)
863 pcre_options |= PCRE2_CASELESS;
864 # else
865 pcre_options |= PCRE_UTF8;
866 if (!lc_mc_search->is_case_sensitive)
867 pcre_options |= PCRE_CASELESS;
868 # endif
869 }
870 else if (!lc_mc_search->is_case_sensitive)
871 {
872 GString *tmp;
873
874 tmp = mc_search_cond->str;
875 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
876 g_string_free (tmp, TRUE);
877 }
878
879 mc_search_cond->regex_handle =
880 # ifdef HAVE_PCRE2
881 pcre2_compile ((unsigned char *) mc_search_cond->str->str, PCRE2_ZERO_TERMINATED,
882 pcre_options, &errcode, &erroffset, NULL);
883 # else
884 pcre_compile (mc_search_cond->str->str, pcre_options, &error, &erroffset, NULL);
885 # endif
886 if (mc_search_cond->regex_handle == NULL)
887 {
888 # ifdef HAVE_PCRE2
889 pcre2_get_error_message (errcode, (unsigned char *) error, sizeof (error));
890 # endif
891 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_COMPILE, "%s", error);
892 return;
893 }
894 # ifdef HAVE_PCRE2
895 if (pcre2_jit_compile (mc_search_cond->regex_handle, PCRE2_JIT_COMPLETE) && *error != '\0')
896 # else
897 lc_mc_search->regex_match_info = pcre_study (mc_search_cond->regex_handle, 0, &error);
898 if (lc_mc_search->regex_match_info == NULL && error != NULL)
899 # endif
900 {
901 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_COMPILE, "%s", error);
902 MC_PTR_FREE (mc_search_cond->regex_handle);
903 return;
904 }
905 #endif
906 }
907
908 lc_mc_search->is_utf8 = str_isutf8 (charset);
909 }
910
911
912
913 gboolean
914 mc_search__run_regex (mc_search_t *lc_mc_search, const void *user_data, off_t start_search,
915 off_t end_search, gsize *found_len)
916 {
917 mc_search_cbret_t ret = MC_SEARCH_CB_NOTFOUND;
918 off_t current_pos, virtual_pos;
919 gint start_pos;
920 gint end_pos;
921
922 if (lc_mc_search->regex_buffer != NULL)
923 g_string_set_size (lc_mc_search->regex_buffer, 0);
924 else
925 lc_mc_search->regex_buffer = g_string_sized_new (64);
926
927 virtual_pos = current_pos = start_search;
928 while (virtual_pos <= end_search)
929 {
930 g_string_set_size (lc_mc_search->regex_buffer, 0);
931 lc_mc_search->start_buffer = current_pos;
932
933 if (lc_mc_search->search_fn != NULL)
934 {
935 while (TRUE)
936 {
937 int current_chr = '\n';
938
939 ret = lc_mc_search->search_fn (user_data, current_pos, ¤t_chr);
940
941 if (ret == MC_SEARCH_CB_ABORT)
942 break;
943
944 if (ret == MC_SEARCH_CB_INVALID)
945 continue;
946
947 current_pos++;
948
949 if (ret == MC_SEARCH_CB_SKIP)
950 continue;
951
952 virtual_pos++;
953
954 g_string_append_c (lc_mc_search->regex_buffer, (char) current_chr);
955
956 if ((char) current_chr == '\n' || virtual_pos > end_search)
957 break;
958 }
959 }
960 else
961 {
962
963
964
965
966 while (TRUE)
967 {
968 const char current_chr = ((const char *) user_data)[current_pos];
969
970 if (current_chr == '\0')
971 break;
972
973 current_pos++;
974
975 if (current_chr == '\n' || current_pos > end_search)
976 break;
977 }
978
979
980 g_string_append_len (lc_mc_search->regex_buffer, (const char *) user_data + virtual_pos,
981 current_pos - virtual_pos);
982 virtual_pos = current_pos;
983 }
984
985 switch (mc_search__regex_found_cond (lc_mc_search, lc_mc_search->regex_buffer))
986 {
987 case COND__FOUND_OK:
988 #ifdef SEARCH_TYPE_GLIB
989 g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
990 #else
991 start_pos = lc_mc_search->iovector[0];
992 end_pos = lc_mc_search->iovector[1];
993 #endif
994 if (found_len != NULL)
995 *found_len = end_pos - start_pos;
996 lc_mc_search->normal_offset = lc_mc_search->start_buffer + start_pos;
997 return TRUE;
998 case COND__NOT_ALL_FOUND:
999 break;
1000 default:
1001 g_string_free (lc_mc_search->regex_buffer, TRUE);
1002 lc_mc_search->regex_buffer = NULL;
1003 return FALSE;
1004 }
1005
1006 if ((lc_mc_search->update_fn != NULL)
1007 && ((lc_mc_search->update_fn) (user_data, current_pos) == MC_SEARCH_CB_ABORT))
1008 ret = MC_SEARCH_CB_ABORT;
1009
1010 if (ret == MC_SEARCH_CB_ABORT || ret == MC_SEARCH_CB_NOTFOUND)
1011 break;
1012 }
1013
1014 g_string_free (lc_mc_search->regex_buffer, TRUE);
1015 lc_mc_search->regex_buffer = NULL;
1016
1017 MC_PTR_FREE (lc_mc_search->error_str);
1018 lc_mc_search->error = ret == MC_SEARCH_CB_ABORT ? MC_SEARCH_E_ABORT : MC_SEARCH_E_NOTFOUND;
1019
1020 return FALSE;
1021 }
1022
1023
1024
1025 GString *
1026 mc_search_regex_prepare_replace_str (mc_search_t *lc_mc_search, GString *replace_str)
1027 {
1028 GString *ret;
1029
1030 int num_replace_tokens;
1031 gsize loop;
1032 gsize prev = 0;
1033 replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM;
1034
1035 num_replace_tokens =
1036 mc_search_regex__get_max_num_of_replace_tokens (replace_str->str, replace_str->len);
1037
1038 if (lc_mc_search->num_results < 0)
1039 return mc_g_string_dup (replace_str);
1040
1041 if (num_replace_tokens > lc_mc_search->num_results - 1
1042 || num_replace_tokens > MC_SEARCH__NUM_REPLACE_ARGS)
1043 {
1044 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE, "%s",
1045 _ (STR_E_RPL_NOT_EQ_TO_FOUND));
1046 return NULL;
1047 }
1048
1049 ret = g_string_sized_new (64);
1050
1051 for (loop = 0; loop < replace_str->len - 1; loop++)
1052 {
1053 int lc_index;
1054 gchar *tmp_str;
1055 gsize len = 0;
1056
1057 lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);
1058
1059 if (lc_index == REPLACE_PREPARE_T_NOTHING_SPECIAL)
1060 {
1061 if (len != 0)
1062 {
1063 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1064 &replace_flags);
1065 mc_search_regex__process_append_str (ret, replace_str->str + loop + 1, len - 1,
1066 &replace_flags);
1067 prev = loop + len;
1068 loop = prev - 1;
1069 }
1070
1071 continue;
1072 }
1073
1074 if (lc_index == REPLACE_PREPARE_T_REPLACE_FLAG)
1075 {
1076 if (loop != 0)
1077 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1078 &replace_flags);
1079 prev = loop + len;
1080 loop = prev - 1;
1081 continue;
1082 }
1083
1084
1085 if (lc_index == REPLACE_PREPARE_T_ESCAPE_SEQ)
1086 {
1087 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1088 &replace_flags);
1089
1090 mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1,
1091 &replace_flags, lc_mc_search->is_utf8);
1092 prev = loop + len;
1093 loop = prev - 1;
1094 continue;
1095 }
1096
1097
1098 if (lc_index > lc_mc_search->num_results)
1099 {
1100 g_string_free (ret, TRUE);
1101 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE,
1102 _ (STR_E_RPL_INVALID_TOKEN), lc_index);
1103 return NULL;
1104 }
1105
1106 tmp_str = mc_search_regex__get_token_by_num (lc_mc_search, lc_index);
1107
1108 if (loop != 0)
1109 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1110 &replace_flags);
1111
1112 mc_search_regex__process_append_str (ret, tmp_str, -1, &replace_flags);
1113 g_free (tmp_str);
1114
1115 prev = loop + len;
1116 loop = prev - 1;
1117 }
1118
1119 mc_search_regex__process_append_str (ret, replace_str->str + prev, replace_str->len - prev,
1120 &replace_flags);
1121
1122 return ret;
1123 }
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 mc_search_line_t
1134 mc_search_get_line_type (const mc_search_t *search)
1135 {
1136 mc_search_line_t search_line_type = MC_SEARCH_LINE_NONE;
1137
1138 if (search->search_type == MC_SEARCH_T_REGEX)
1139 {
1140 if (search->original.str->str[0] == '^')
1141 search_line_type |= MC_SEARCH_LINE_BEGIN;
1142
1143 if (search->original.str->str[search->original.str->len - 1] == '$')
1144 search_line_type |= MC_SEARCH_LINE_END;
1145 }
1146
1147 return search_line_type;
1148 }
1149
1150