root/lib/search/hex.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. mc_search__hex_translate_to_regex
  2. mc_search__cond_struct_new_init_hex
  3. mc_search__run_hex
  4. mc_search_hex_prepare_replace_str

   1 /*
   2    Search text engine.
   3    HEX-style pattern matching
   4 
   5    Copyright (C) 2009-2024
   6    Free Software Foundation, Inc.
   7 
   8    Written by:
   9    Slava Zanko <slavazanko@gmail.com>, 2009.
  10 
  11    This file is part of the Midnight Commander.
  12 
  13    The Midnight Commander is free software: you can redistribute it
  14    and/or modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation, either version 3 of the License,
  16    or (at your option) any later version.
  17 
  18    The Midnight Commander is distributed in the hope that it will be useful,
  19    but WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21    GNU General Public License for more details.
  22 
  23    You should have received a copy of the GNU General Public License
  24    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  25  */
  26 
  27 #include <config.h>
  28 
  29 #include <stdio.h>
  30 
  31 #include "lib/global.h"
  32 #include "lib/strutil.h"
  33 #include "lib/search.h"
  34 #include "lib/strescape.h"
  35 
  36 #include "internal.h"
  37 
  38 /*** global variables ****************************************************************************/
  39 
  40 /*** file scope macro definitions ****************************************************************/
  41 
  42 typedef enum
  43 {
  44     MC_SEARCH_HEX_E_OK,
  45     MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE,
  46     MC_SEARCH_HEX_E_INVALID_CHARACTER,
  47     MC_SEARCH_HEX_E_UNMATCHED_QUOTES
  48 } mc_search_hex_parse_error_t;
  49 
  50 /*** file scope type declarations ****************************************************************/
  51 
  52 /*** forward declarations (file scope functions) *************************************************/
  53 
  54 /*** file scope variables ************************************************************************/
  55 
  56 /* --------------------------------------------------------------------------------------------- */
  57 /*** file scope functions ************************************************************************/
  58 /* --------------------------------------------------------------------------------------------- */
  59 
  60 static GString *
  61 mc_search__hex_translate_to_regex (const GString * astr, mc_search_hex_parse_error_t * error_ptr,
     /* [previous][next][first][last][top][bottom][index][help]  */
  62                                    int *error_pos_ptr)
  63 {
  64     GString *buff;
  65     const char *str;
  66     gsize str_len;
  67     gsize loop = 0;
  68     mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
  69 
  70     buff = g_string_sized_new (64);
  71     str = astr->str;
  72     str_len = astr->len;
  73 
  74     while (loop < str_len && error == MC_SEARCH_HEX_E_OK)
  75     {
  76         unsigned int val;
  77         int ptr;
  78 
  79         if (g_ascii_isspace (str[loop]))
  80         {
  81             /* Eat-up whitespace between tokens. */
  82             while (g_ascii_isspace (str[loop]))
  83                 loop++;
  84         }
  85         /* cppcheck-suppress invalidscanf */
  86         else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1)
  87         {
  88             if (val > 255)
  89                 error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE;
  90             else
  91             {
  92                 g_string_append_printf (buff, "\\x%02X", val);
  93                 loop += ptr;
  94             }
  95         }
  96         else if (str[loop] == '"')
  97         {
  98             gsize loop2;
  99 
 100             loop2 = loop + 1;
 101 
 102             while (loop2 < str_len)
 103             {
 104                 if (str[loop2] == '"')
 105                     break;
 106                 if (str[loop2] == '\\' && loop2 + 1 < str_len)
 107                     loop2++;
 108                 g_string_append_c (buff, str[loop2]);
 109                 loop2++;
 110             }
 111 
 112             if (str[loop2] == '\0')
 113                 error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES;
 114             else
 115                 loop = loop2 + 1;
 116         }
 117         else
 118             error = MC_SEARCH_HEX_E_INVALID_CHARACTER;
 119     }
 120 
 121     if (error != MC_SEARCH_HEX_E_OK)
 122     {
 123         g_string_free (buff, TRUE);
 124         if (error_ptr != NULL)
 125             *error_ptr = error;
 126         if (error_pos_ptr != NULL)
 127             *error_pos_ptr = loop;
 128         return NULL;
 129     }
 130 
 131     return buff;
 132 }
 133 
 134 /* --------------------------------------------------------------------------------------------- */
 135 /*** public functions ****************************************************************************/
 136 /* --------------------------------------------------------------------------------------------- */
 137 
 138 void
 139 mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search,
     /* [previous][next][first][last][top][bottom][index][help]  */
 140                                      mc_search_cond_t * mc_search_cond)
 141 {
 142     GString *tmp;
 143     mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
 144     int error_pos = 0;
 145 
 146     /*
 147      * We may be searching in binary data, which is often invalid UTF-8.
 148      *
 149      * We have to create a non UTF-8 regex (that is, G_REGEX_RAW) or else, as
 150      * the data is invalid UTF-8, both GLib's PCRE and our
 151      * mc_search__g_regex_match_full_safe() are going to fail us. The former by
 152      * not finding all bytes, the latter by overwriting the supposedly invalid
 153      * UTF-8 with NULs.
 154      *
 155      * To do this, we specify "ASCII" as the charset.
 156      *
 157      * In fact, we can specify any charset other than "UTF-8": any such charset
 158      * will trigger G_REGEX_RAW (see [1]). The output of [2] will be the same
 159      * for all charsets because it skips the \xXX symbols
 160      * mc_search__hex_translate_to_regex() outputs.
 161      *
 162      * But "ASCII" is the best choice because a hex pattern may contain a
 163      * quoted string: this way we know [2] will ignore any characters outside
 164      * ASCII letters range (these ignored chars will be copied verbatim to the
 165      * output and will match as-is; in other words, in a case-sensitive manner;
 166      * If the user is interested in case-insensitive searches of international
 167      * text, he shouldn't be using hex search in the first place.)
 168      *
 169      * Switching out of UTF-8 has another advantage:
 170      *
 171      * When doing case-insensitive searches, GLib treats \xXX symbols as normal
 172      * letters and therefore matches both "a" and "A" for the hex pattern
 173      * "0x61". When we switch out of UTF-8, we're switching to using [2], which
 174      * doesn't have this issue.
 175      *
 176      * [1] mc_search__cond_struct_new_init_regex
 177      * [2] mc_search__cond_struct_new_regex_ci_str
 178      */
 179     if (str_isutf8 (charset))
 180         charset = "ASCII";
 181 
 182     tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos);
 183     if (tmp != NULL)
 184     {
 185         g_string_free (mc_search_cond->str, TRUE);
 186         mc_search_cond->str = tmp;
 187         mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
 188     }
 189     else
 190     {
 191         const char *desc;
 192 
 193         switch (error)
 194         {
 195         case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE:
 196             desc =
 197                 _
 198                 ("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)");
 199             break;
 200         case MC_SEARCH_HEX_E_INVALID_CHARACTER:
 201             desc = _("Invalid character");
 202             break;
 203         case MC_SEARCH_HEX_E_UNMATCHED_QUOTES:
 204             desc = _("Unmatched quotes character");
 205             break;
 206         default:
 207             desc = "";
 208         }
 209 
 210         lc_mc_search->error = MC_SEARCH_E_INPUT;
 211         lc_mc_search->error_str =
 212             g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc);
 213     }
 214 }
 215 
 216 /* --------------------------------------------------------------------------------------------- */
 217 
 218 gboolean
 219 mc_search__run_hex (mc_search_t * lc_mc_search, const void *user_data,
     /* [previous][next][first][last][top][bottom][index][help]  */
 220                     gsize start_search, gsize end_search, gsize * found_len)
 221 {
 222     return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
 223 }
 224 
 225 /* --------------------------------------------------------------------------------------------- */
 226 
 227 GString *
 228 mc_search_hex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
     /* [previous][next][first][last][top][bottom][index][help]  */
 229 {
 230     (void) lc_mc_search;
 231 
 232     return mc_g_string_dup (replace_str);
 233 }
 234 
 235 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */