This source file includes following definitions.
- mc_search__regex_str_append_if_special
- mc_search__cond_struct_new_regex_hex_add
- mc_search__cond_struct_new_regex_accum_append
- mc_search__cond_struct_new_regex_ci_str
- mc_search__g_regex_match_full_safe
- mc_search__regex_found_cond_one
- mc_search__regex_found_cond
- mc_search_regex__get_max_num_of_replace_tokens
- mc_search_regex__get_token_by_num
- mc_search_regex__replace_handle_esc_seq
- mc_search_regex__process_replace_str
- mc_search_regex__process_append_str
- mc_search_regex__process_escape_sequence
- mc_search__cond_struct_new_init_regex
- mc_search__run_regex
- mc_search_regex_prepare_replace_str
- mc_search_get_line_type
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include <config.h>
30
31 #include <stdlib.h>
32
33 #include "lib/global.h"
34 #include "lib/strutil.h"
35 #include "lib/search.h"
36 #include "lib/util.h"
37
38 #include "internal.h"
39
40
41
42
43
44 #define REPLACE_PREPARE_T_NOTHING_SPECIAL -1
45 #define REPLACE_PREPARE_T_REPLACE_FLAG -2
46 #define REPLACE_PREPARE_T_ESCAPE_SEQ -3
47
48
49
50 typedef enum
51 {
52 REPLACE_T_NO_TRANSFORM = 0,
53 REPLACE_T_UPP_TRANSFORM_CHAR = 1,
54 REPLACE_T_LOW_TRANSFORM_CHAR = 2,
55 REPLACE_T_UPP_TRANSFORM = 4,
56 REPLACE_T_LOW_TRANSFORM = 8
57 } replace_transform_type_t;
58
59
60
61
62
63
64
65
66
67 static gboolean
68 mc_search__regex_str_append_if_special (GString *copy_to, const GString *regex_str, gsize *offset)
69 {
70 const char *special_chars[] = {
71 "\\a",
72 "\\b",
73 "\\B",
74 "\\c",
75 "\\C",
76 "\\d",
77 "\\D",
78 "\\e",
79 "\\E",
80 "\\f",
81 "\\l",
82 "\\L",
83 "\\n",
84 "\\Q",
85 "\\r",
86 "\\s",
87 "\\S",
88 "\\t",
89 "\\u",
90 "\\U",
91 "\\w",
92 "\\W",
93 "\\x",
94 "\\X",
95 NULL,
96 };
97
98 char *tmp_regex_str;
99 const char **spec_chr;
100
101 tmp_regex_str = &(regex_str->str[*offset]);
102
103 for (spec_chr = special_chars; *spec_chr != NULL; spec_chr++)
104 {
105 gsize spec_chr_len;
106
107 spec_chr_len = strlen (*spec_chr);
108
109 if (strncmp (tmp_regex_str, *spec_chr, spec_chr_len) == 0
110 && !str_is_char_escaped (regex_str->str, tmp_regex_str))
111 {
112 if (strncmp ("\\x", *spec_chr, spec_chr_len) == 0)
113 {
114 if (tmp_regex_str[spec_chr_len] != '{')
115 spec_chr_len += 2;
116 else
117 {
118 while ((spec_chr_len < regex_str->len - *offset)
119 && tmp_regex_str[spec_chr_len] != '}')
120 spec_chr_len++;
121 if (tmp_regex_str[spec_chr_len] == '}')
122 spec_chr_len++;
123 }
124 }
125 g_string_append_len (copy_to, tmp_regex_str, spec_chr_len);
126 *offset += spec_chr_len;
127 return TRUE;
128 }
129 }
130
131 return FALSE;
132 }
133
134
135
136 static void
137 mc_search__cond_struct_new_regex_hex_add (const char *charset, GString *str_to,
138 const GString *one_char)
139 {
140 GString *upp, *low;
141 gsize loop;
142
143 upp = mc_search__toupper_case_str (charset, one_char);
144 low = mc_search__tolower_case_str (charset, one_char);
145
146 for (loop = 0; loop < upp->len; loop++)
147 {
148 gchar tmp_str[10 + 1];
149 gint tmp_len;
150
151 if (loop >= low->len || upp->str[loop] == low->str[loop])
152 tmp_len =
153 g_snprintf (tmp_str, sizeof (tmp_str), "\\x%02X", (unsigned char) upp->str[loop]);
154 else
155 tmp_len = g_snprintf (tmp_str, sizeof (tmp_str), "[\\x%02X\\x%02X]",
156 (unsigned char) upp->str[loop], (unsigned char) low->str[loop]);
157
158 g_string_append_len (str_to, tmp_str, tmp_len);
159 }
160
161 g_string_free (upp, TRUE);
162 g_string_free (low, TRUE);
163 }
164
165
166
167 static void
168 mc_search__cond_struct_new_regex_accum_append (const char *charset, GString *str_to,
169 GString *str_from)
170 {
171 GString *recoded_part;
172 gsize loop = 0;
173
174 recoded_part = g_string_sized_new (32);
175
176 while (loop < str_from->len)
177 {
178 GString *one_char;
179 gboolean just_letters;
180
181 one_char = mc_search__get_one_symbol (charset, str_from->str + loop,
182 MIN (str_from->len - loop, 6), &just_letters);
183
184 if (one_char->len == 0)
185 loop++;
186 else
187 {
188 loop += one_char->len;
189
190 if (just_letters)
191 mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char);
192 else
193 g_string_append_len (recoded_part, one_char->str, one_char->len);
194 }
195
196 g_string_free (one_char, TRUE);
197 }
198
199 g_string_append_len (str_to, recoded_part->str, recoded_part->len);
200 g_string_free (recoded_part, TRUE);
201 g_string_set_size (str_from, 0);
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 static GString *
220 mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString *astr)
221 {
222 GString *accumulator, *spec_char, *ret_str;
223 gsize loop;
224
225 ret_str = g_string_sized_new (64);
226 accumulator = g_string_sized_new (64);
227 spec_char = g_string_sized_new (64);
228 loop = 0;
229
230 while (loop < astr->len)
231 {
232 if (mc_search__regex_str_append_if_special (spec_char, astr, &loop))
233 {
234 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
235 g_string_append_len (ret_str, spec_char->str, spec_char->len);
236 g_string_set_size (spec_char, 0);
237 continue;
238 }
239
240 if (astr->str[loop] == '[' && !str_is_char_escaped (astr->str, &(astr->str[loop])))
241 {
242 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
243
244 while (loop < astr->len
245 && !(astr->str[loop] == ']'
246 && !str_is_char_escaped (astr->str, &(astr->str[loop]))))
247 {
248 g_string_append_c (ret_str, astr->str[loop]);
249 loop++;
250 }
251
252 g_string_append_c (ret_str, astr->str[loop]);
253 loop++;
254 continue;
255 }
256
257
258
259 g_string_append_c (accumulator, astr->str[loop]);
260 loop++;
261 }
262 mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
263
264 g_string_free (accumulator, TRUE);
265 g_string_free (spec_char, TRUE);
266
267 return ret_str;
268 }
269
270
271
272
273
274
275
276 static gboolean
277 mc_search__g_regex_match_full_safe (const GRegex *regex, const gchar *string, gssize string_len,
278 gint start_position, GRegexMatchFlags match_options,
279 GMatchInfo **match_info, GError **error)
280 {
281 char *string_safe, *p, *end;
282 gboolean ret;
283
284 if (string_len < 0)
285 string_len = strlen (string);
286
287 if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW)
288 || g_utf8_validate (string, string_len, NULL))
289 {
290 return g_regex_match_full (regex, string, string_len, start_position, match_options,
291 match_info, error);
292 }
293
294
295 p = string_safe = g_malloc (string_len + 1);
296 memcpy (string_safe, string, string_len);
297 string_safe[string_len] = '\0';
298 end = p + string_len;
299
300 while (p < end)
301 {
302 gunichar c = g_utf8_get_char_validated (p, -1);
303 if (c != (gunichar) (-1) && c != (gunichar) (-2))
304 {
305 p = g_utf8_next_char (p);
306 }
307 else
308 {
309
310
311
312 *p++ = '\0';
313 }
314 }
315
316 ret = g_regex_match_full (regex, string_safe, string_len, start_position, match_options,
317 match_info, error);
318 g_free (string_safe);
319 return ret;
320 }
321
322
323
324 static mc_search__found_cond_t
325 mc_search__regex_found_cond_one (mc_search_t *lc_mc_search, GRegex *regex, GString *search_str)
326 {
327 GError *mcerror = NULL;
328
329 if (!mc_search__g_regex_match_full_safe (regex, search_str->str, search_str->len, 0,
330 G_REGEX_MATCH_NEWLINE_ANY,
331 &lc_mc_search->regex_match_info, &mcerror))
332 {
333 g_match_info_free (lc_mc_search->regex_match_info);
334 lc_mc_search->regex_match_info = NULL;
335 if (mcerror != NULL)
336 {
337 lc_mc_search->error = MC_SEARCH_E_REGEX;
338 g_free (lc_mc_search->error_str);
339 lc_mc_search->error_str =
340 str_conv_gerror_message (mcerror, _ ("Regular expression error"));
341 g_error_free (mcerror);
342 return COND__FOUND_ERROR;
343 }
344 return COND__NOT_FOUND;
345 }
346 lc_mc_search->num_results = g_match_info_get_match_count (lc_mc_search->regex_match_info);
347
348 return COND__FOUND_OK;
349 }
350
351
352
353 static mc_search__found_cond_t
354 mc_search__regex_found_cond (mc_search_t *lc_mc_search, GString *search_str)
355 {
356 gsize loop1;
357
358 for (loop1 = 0; loop1 < lc_mc_search->prepared.conditions->len; loop1++)
359 {
360 mc_search_cond_t *mc_search_cond;
361 mc_search__found_cond_t ret;
362
363 mc_search_cond =
364 (mc_search_cond_t *) g_ptr_array_index (lc_mc_search->prepared.conditions, loop1);
365
366 if (!mc_search_cond->regex_handle)
367 continue;
368
369 ret = mc_search__regex_found_cond_one (lc_mc_search, mc_search_cond->regex_handle,
370 search_str);
371 if (ret != COND__NOT_FOUND)
372 return ret;
373 }
374 return COND__NOT_ALL_FOUND;
375 }
376
377
378
379 static int
380 mc_search_regex__get_max_num_of_replace_tokens (const gchar *str, gsize len)
381 {
382 int max_token = 0;
383 gsize loop;
384
385 for (loop = 0; loop < len - 1; loop++)
386 if (str[loop] == '\\' && g_ascii_isdigit (str[loop + 1]))
387 {
388 if (str_is_char_escaped (str, &str[loop]))
389 continue;
390 if (max_token < str[loop + 1] - '0')
391 max_token = str[loop + 1] - '0';
392 }
393 else if (str[loop] == '$' && str[loop + 1] == '{')
394 {
395 gsize tmp_len;
396
397 if (str_is_char_escaped (str, &str[loop]))
398 continue;
399
400 for (tmp_len = 0;
401 loop + tmp_len + 2 < len && (str[loop + 2 + tmp_len] & (char) 0xf0) == 0x30;
402 tmp_len++)
403 ;
404
405 if (str[loop + 2 + tmp_len] == '}')
406 {
407 int tmp_token;
408 char *tmp_str;
409
410 tmp_str = g_strndup (&str[loop + 2], tmp_len);
411 tmp_token = atoi (tmp_str);
412 if (max_token < tmp_token)
413 max_token = tmp_token;
414 g_free (tmp_str);
415 }
416 }
417
418 return max_token;
419 }
420
421
422
423 static char *
424 mc_search_regex__get_token_by_num (const mc_search_t *lc_mc_search, gsize lc_index)
425 {
426 int fnd_start = 0, fnd_end = 0;
427
428 g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &fnd_start, &fnd_end);
429
430 if (fnd_end == fnd_start)
431 return g_strdup ("");
432
433 return g_strndup (lc_mc_search->regex_buffer->str + fnd_start, fnd_end - fnd_start);
434 }
435
436
437
438 static gboolean
439 mc_search_regex__replace_handle_esc_seq (const GString *replace_str, const gsize current_pos,
440 gsize *skip_len, int *ret)
441 {
442 char *curr_str = &(replace_str->str[current_pos]);
443 char c = curr_str[1];
444
445 if (replace_str->len > current_pos + 2)
446 {
447 if (c == '{')
448 {
449 for (*skip_len = 2;
450 current_pos + *skip_len < replace_str->len && curr_str[*skip_len] >= '0'
451 && curr_str[*skip_len] <= '7';
452 (*skip_len)++)
453 ;
454
455 if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}')
456 {
457 (*skip_len)++;
458 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
459 return FALSE;
460 }
461 else
462 {
463 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
464 return TRUE;
465 }
466 }
467
468 if (c == 'x')
469 {
470 *skip_len = 2;
471 c = curr_str[2];
472 if (c == '{')
473 {
474 for (*skip_len = 3;
475 current_pos + *skip_len < replace_str->len
476 && g_ascii_isxdigit ((guchar) curr_str[*skip_len]);
477 (*skip_len)++)
478 ;
479
480 if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}')
481 {
482 (*skip_len)++;
483 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
484 return FALSE;
485 }
486 else
487 {
488 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
489 return TRUE;
490 }
491 }
492 else if (!g_ascii_isxdigit ((guchar) c))
493 {
494 *skip_len = 2;
495 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
496 return FALSE;
497 }
498 else
499 {
500 c = curr_str[3];
501 if (!g_ascii_isxdigit ((guchar) c))
502 *skip_len = 3;
503 else
504 *skip_len = 4;
505 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
506 return FALSE;
507 }
508 }
509 }
510
511 if (strchr ("ntvbrfa", c) != NULL)
512 {
513 *skip_len = 2;
514 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ;
515 return FALSE;
516 }
517 return TRUE;
518 }
519
520
521
522 static int
523 mc_search_regex__process_replace_str (const GString *replace_str, const gsize current_pos,
524 gsize *skip_len, replace_transform_type_t *replace_flags)
525 {
526 int ret = -1;
527 const char *curr_str = &(replace_str->str[current_pos]);
528
529 if (current_pos > replace_str->len)
530 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
531
532 *skip_len = 0;
533
534 if (replace_str->len > current_pos + 2 && curr_str[0] == '$' && curr_str[1] == '{'
535 && (curr_str[2] & (char) 0xf0) == 0x30)
536 {
537 char *tmp_str;
538
539 if (str_is_char_escaped (replace_str->str, curr_str))
540 {
541 *skip_len = 1;
542 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
543 }
544
545 for (*skip_len = 0; current_pos + *skip_len + 2 < replace_str->len
546 && (curr_str[2 + *skip_len] & (char) 0xf0) == 0x30;
547 (*skip_len)++)
548 ;
549
550 if (curr_str[2 + *skip_len] != '}')
551 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
552
553 tmp_str = g_strndup (curr_str + 2, *skip_len);
554 if (tmp_str == NULL)
555 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
556
557 ret = atoi (tmp_str);
558 g_free (tmp_str);
559
560 *skip_len += 3;
561 return ret;
562 }
563
564 if (curr_str[0] == '\\' && replace_str->len > current_pos + 1)
565 {
566 if (str_is_char_escaped (replace_str->str, curr_str))
567 {
568 *skip_len = 1;
569 return REPLACE_PREPARE_T_NOTHING_SPECIAL;
570 }
571
572 if (g_ascii_isdigit (curr_str[1]))
573 {
574 ret = g_ascii_digit_value (curr_str[1]);
575 *skip_len = 2;
576 return ret;
577 }
578
579 if (!mc_search_regex__replace_handle_esc_seq (replace_str, current_pos, skip_len, &ret))
580 return ret;
581
582 ret = REPLACE_PREPARE_T_REPLACE_FLAG;
583 *skip_len += 2;
584
585 switch (curr_str[1])
586 {
587 case 'U':
588 *replace_flags |= REPLACE_T_UPP_TRANSFORM;
589 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM;
590 break;
591 case 'u':
592 *replace_flags |= REPLACE_T_UPP_TRANSFORM_CHAR;
593 break;
594 case 'L':
595 *replace_flags |= REPLACE_T_LOW_TRANSFORM;
596 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM;
597 break;
598 case 'l':
599 *replace_flags |= REPLACE_T_LOW_TRANSFORM_CHAR;
600 break;
601 case 'E':
602 *replace_flags = REPLACE_T_NO_TRANSFORM;
603 break;
604 default:
605 ret = REPLACE_PREPARE_T_NOTHING_SPECIAL;
606 break;
607 }
608 }
609 return ret;
610 }
611
612
613
614 static void
615 mc_search_regex__process_append_str (GString *dest_str, const char *from, gsize len,
616 replace_transform_type_t *replace_flags)
617 {
618 gsize loop;
619 gsize char_len;
620
621 if (len == (gsize) (-1))
622 len = strlen (from);
623
624 if (*replace_flags == REPLACE_T_NO_TRANSFORM)
625 {
626 g_string_append_len (dest_str, from, len);
627 return;
628 }
629
630 for (loop = 0; loop < len; loop += char_len)
631 {
632 GString *tmp_string = NULL;
633 GString *s;
634
635 s = mc_search__get_one_symbol (NULL, from + loop, len - loop, NULL);
636 char_len = s->len;
637
638 if ((*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR) != 0)
639 {
640 *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR;
641 tmp_string = mc_search__toupper_case_str (NULL, s);
642 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
643 }
644 else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR) != 0)
645 {
646 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR;
647 tmp_string = mc_search__tolower_case_str (NULL, s);
648 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
649 }
650 else if ((*replace_flags & REPLACE_T_UPP_TRANSFORM) != 0)
651 {
652 tmp_string = mc_search__toupper_case_str (NULL, s);
653 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
654 }
655 else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM) != 0)
656 {
657 tmp_string = mc_search__tolower_case_str (NULL, s);
658 g_string_append_len (dest_str, tmp_string->str, tmp_string->len);
659 }
660
661 g_string_free (s, TRUE);
662 if (tmp_string != NULL)
663 g_string_free (tmp_string, TRUE);
664 }
665 }
666
667
668
669 static void
670 mc_search_regex__process_escape_sequence (GString *dest_str, const char *from, gsize len,
671 replace_transform_type_t *replace_flags, gboolean is_utf8)
672 {
673 gsize i = 0;
674 unsigned int c = 0;
675 char b;
676
677 if (len == (gsize) (-1))
678 len = strlen (from);
679 if (len == 0)
680 return;
681
682 if (from[i] == '{')
683 i++;
684 if (i >= len)
685 return;
686
687 if (from[i] == 'x')
688 {
689 i++;
690 if (i < len && from[i] == '{')
691 i++;
692 for (; i < len; i++)
693 {
694 if (from[i] >= '0' && from[i] <= '9')
695 c = c * 16 + from[i] - '0';
696 else if (from[i] >= 'a' && from[i] <= 'f')
697 c = c * 16 + 10 + from[i] - 'a';
698 else if (from[i] >= 'A' && from[i] <= 'F')
699 c = c * 16 + 10 + from[i] - 'A';
700 else
701 break;
702 }
703 }
704 else if (from[i] >= '0' && from[i] <= '7')
705 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++)
706 c = c * 8 + from[i] - '0';
707 else
708 {
709 switch (from[i])
710 {
711 case 'n':
712 c = '\n';
713 break;
714 case 't':
715 c = '\t';
716 break;
717 case 'v':
718 c = '\v';
719 break;
720 case 'b':
721 c = '\b';
722 break;
723 case 'r':
724 c = '\r';
725 break;
726 case 'f':
727 c = '\f';
728 break;
729 case 'a':
730 c = '\a';
731 break;
732 default:
733 mc_search_regex__process_append_str (dest_str, from, len, replace_flags);
734 return;
735 }
736 }
737
738 if (c < 0x80 || !is_utf8)
739 g_string_append_c (dest_str, (char) c);
740 else if (c < 0x800)
741 {
742 b = 0xC0 | (c >> 6);
743 g_string_append_c (dest_str, b);
744 b = 0x80 | (c & 0x3F);
745 g_string_append_c (dest_str, b);
746 }
747 else if (c < 0x10000)
748 {
749 b = 0xE0 | (c >> 12);
750 g_string_append_c (dest_str, b);
751 b = 0x80 | ((c >> 6) & 0x3F);
752 g_string_append_c (dest_str, b);
753 b = 0x80 | (c & 0x3F);
754 g_string_append_c (dest_str, b);
755 }
756 else if (c < 0x10FFFF)
757 {
758 b = 0xF0 | (c >> 16);
759 g_string_append_c (dest_str, b);
760 b = 0x80 | ((c >> 12) & 0x3F);
761 g_string_append_c (dest_str, b);
762 b = 0x80 | ((c >> 6) & 0x3F);
763 g_string_append_c (dest_str, b);
764 b = 0x80 | (c & 0x3F);
765 g_string_append_c (dest_str, b);
766 }
767 }
768
769
770
771
772
773 void
774 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t *lc_mc_search,
775 mc_search_cond_t *mc_search_cond)
776 {
777 if (lc_mc_search->whole_words && !lc_mc_search->is_entire_line)
778 {
779
780
781
782 g_string_prepend (mc_search_cond->str, "(?<![\\p{L}\\p{N}_])");
783 g_string_append (mc_search_cond->str, "(?![\\p{L}\\p{N}_])");
784 }
785
786 {
787 GError *mcerror = NULL;
788 GRegexCompileFlags g_regex_options = G_REGEX_OPTIMIZE | G_REGEX_DOTALL;
789
790 if (str_isutf8 (charset) && mc_global.utf8_display)
791 {
792 if (!lc_mc_search->is_case_sensitive)
793 g_regex_options |= G_REGEX_CASELESS;
794 }
795 else
796 {
797 g_regex_options |= G_REGEX_RAW;
798
799 if (!lc_mc_search->is_case_sensitive)
800 {
801 GString *tmp;
802
803 tmp = mc_search_cond->str;
804 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
805 g_string_free (tmp, TRUE);
806 }
807 }
808
809 mc_search_cond->regex_handle =
810 g_regex_new (mc_search_cond->str->str, g_regex_options, 0, &mcerror);
811
812 if (mcerror != NULL)
813 {
814 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
815 g_free (lc_mc_search->error_str);
816 lc_mc_search->error_str =
817 str_conv_gerror_message (mcerror, _ ("Regular expression error"));
818 g_error_free (mcerror);
819 return;
820 }
821 }
822
823 lc_mc_search->is_utf8 = str_isutf8 (charset);
824 }
825
826
827
828 gboolean
829 mc_search__run_regex (mc_search_t *lc_mc_search, const void *user_data, off_t start_search,
830 off_t end_search, gsize *found_len)
831 {
832 mc_search_cbret_t ret = MC_SEARCH_CB_NOTFOUND;
833 off_t current_pos, virtual_pos;
834
835 if (lc_mc_search->regex_buffer != NULL)
836 g_string_set_size (lc_mc_search->regex_buffer, 0);
837 else
838 lc_mc_search->regex_buffer = g_string_sized_new (64);
839
840 virtual_pos = current_pos = start_search;
841 while (virtual_pos <= end_search)
842 {
843 g_string_set_size (lc_mc_search->regex_buffer, 0);
844 lc_mc_search->start_buffer = current_pos;
845
846 if (lc_mc_search->search_fn != NULL)
847 {
848 while (TRUE)
849 {
850 int current_chr = '\n';
851
852 ret = lc_mc_search->search_fn (user_data, current_pos, ¤t_chr);
853
854 if (ret == MC_SEARCH_CB_ABORT)
855 break;
856
857 if (ret == MC_SEARCH_CB_INVALID)
858 continue;
859
860 current_pos++;
861
862 if (ret == MC_SEARCH_CB_SKIP)
863 continue;
864
865 virtual_pos++;
866
867 g_string_append_c (lc_mc_search->regex_buffer, (char) current_chr);
868
869 if ((char) current_chr == '\n' || virtual_pos > end_search)
870 break;
871 }
872 }
873 else
874 {
875
876
877
878
879 while (TRUE)
880 {
881 const char current_chr = ((const char *) user_data)[current_pos];
882
883 if (current_chr == '\0')
884 break;
885
886 current_pos++;
887
888 if (current_chr == '\n' || current_pos > end_search)
889 break;
890 }
891
892
893 g_string_append_len (lc_mc_search->regex_buffer, (const char *) user_data + virtual_pos,
894 current_pos - virtual_pos);
895 virtual_pos = current_pos;
896 }
897
898 switch (mc_search__regex_found_cond (lc_mc_search, lc_mc_search->regex_buffer))
899 {
900 case COND__FOUND_OK:
901 {
902 gint start_pos, end_pos;
903
904 g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
905 if (found_len != NULL)
906 *found_len = end_pos - start_pos;
907 lc_mc_search->normal_offset = lc_mc_search->start_buffer + start_pos;
908 return TRUE;
909 }
910 case COND__NOT_ALL_FOUND:
911 break;
912 default:
913 g_string_free (lc_mc_search->regex_buffer, TRUE);
914 lc_mc_search->regex_buffer = NULL;
915 return FALSE;
916 }
917
918 if ((lc_mc_search->update_fn != NULL)
919 && ((lc_mc_search->update_fn) (user_data, current_pos) == MC_SEARCH_CB_ABORT))
920 ret = MC_SEARCH_CB_ABORT;
921
922 if (ret == MC_SEARCH_CB_ABORT || ret == MC_SEARCH_CB_NOTFOUND)
923 break;
924 }
925
926 g_string_free (lc_mc_search->regex_buffer, TRUE);
927 lc_mc_search->regex_buffer = NULL;
928
929 MC_PTR_FREE (lc_mc_search->error_str);
930 lc_mc_search->error = ret == MC_SEARCH_CB_ABORT ? MC_SEARCH_E_ABORT : MC_SEARCH_E_NOTFOUND;
931
932 return FALSE;
933 }
934
935
936
937 GString *
938 mc_search_regex_prepare_replace_str (mc_search_t *lc_mc_search, GString *replace_str)
939 {
940 GString *ret;
941
942 int num_replace_tokens;
943 gsize loop;
944 gsize prev = 0;
945 replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM;
946
947 num_replace_tokens =
948 mc_search_regex__get_max_num_of_replace_tokens (replace_str->str, replace_str->len);
949
950 if (lc_mc_search->num_results < 0)
951 return mc_g_string_dup (replace_str);
952
953 if (num_replace_tokens > lc_mc_search->num_results - 1
954 || num_replace_tokens > MC_SEARCH__NUM_REPLACE_ARGS)
955 {
956 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE, "%s",
957 _ (STR_E_RPL_NOT_EQ_TO_FOUND));
958 return NULL;
959 }
960
961 ret = g_string_sized_new (64);
962
963 for (loop = 0; loop < replace_str->len - 1; loop++)
964 {
965 int lc_index;
966 gchar *tmp_str;
967 gsize len = 0;
968
969 lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);
970
971 if (lc_index == REPLACE_PREPARE_T_NOTHING_SPECIAL)
972 {
973 if (len != 0)
974 {
975 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
976 &replace_flags);
977 mc_search_regex__process_append_str (ret, replace_str->str + loop + 1, len - 1,
978 &replace_flags);
979 prev = loop + len;
980 loop = prev - 1;
981 }
982
983 continue;
984 }
985
986 if (lc_index == REPLACE_PREPARE_T_REPLACE_FLAG)
987 {
988 if (loop != 0)
989 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
990 &replace_flags);
991 prev = loop + len;
992 loop = prev - 1;
993 continue;
994 }
995
996
997 if (lc_index == REPLACE_PREPARE_T_ESCAPE_SEQ)
998 {
999 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1000 &replace_flags);
1001
1002 mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1,
1003 &replace_flags, lc_mc_search->is_utf8);
1004 prev = loop + len;
1005 loop = prev - 1;
1006 continue;
1007 }
1008
1009
1010 if (lc_index > lc_mc_search->num_results)
1011 {
1012 g_string_free (ret, TRUE);
1013 mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE,
1014 _ (STR_E_RPL_INVALID_TOKEN), lc_index);
1015 return NULL;
1016 }
1017
1018 tmp_str = mc_search_regex__get_token_by_num (lc_mc_search, lc_index);
1019
1020 if (loop != 0)
1021 mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev,
1022 &replace_flags);
1023
1024 mc_search_regex__process_append_str (ret, tmp_str, -1, &replace_flags);
1025 g_free (tmp_str);
1026
1027 prev = loop + len;
1028 loop = prev - 1;
1029 }
1030
1031 mc_search_regex__process_append_str (ret, replace_str->str + prev, replace_str->len - prev,
1032 &replace_flags);
1033
1034 return ret;
1035 }
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045 mc_search_line_t
1046 mc_search_get_line_type (const mc_search_t *search)
1047 {
1048 mc_search_line_t search_line_type = MC_SEARCH_LINE_NONE;
1049
1050 if (search->search_type == MC_SEARCH_T_REGEX)
1051 {
1052 if (search->original.str->str[0] == '^')
1053 search_line_type |= MC_SEARCH_LINE_BEGIN;
1054
1055 if (search->original.str->str[search->original.str->len - 1] == '$')
1056 search_line_type |= MC_SEARCH_LINE_END;
1057 }
1058
1059 return search_line_type;
1060 }
1061
1062