src/vfs/tar/tar-internal.c

src/vfs/tar/tar-internal.c
/* */
This source file includes following definitions.
tar_short_read
tar_flush_read
tar_flush_archive
tar_seek_archive
is_octal_digit
tar_assign_string
tar_assign_string_dup
tar_assign_string_dup_n
stoint
tar_from_header
off_from_header
tar_find_next_block
tar_set_next_block_after
tar_current_block_ordinal
tar_skip_file
   1 /*
   2    Virtual File System: GNU Tar file system.
   3 
   4    Copyright (C) 2023-2025
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Andrew Borodin <aborodin@vmail.ru>, 2023
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25 
  26 /**
  27  * \file
  28  * \brief Source: Virtual File System: GNU Tar file system
  29  * \author Andrew Borodin
  30  * \date 2022
  31  */
  32 
  33 #include <config.h>
  34 
  35 #include <ctype.h>     // isdigit()
  36 #include <inttypes.h>  // uintmax_t
  37 
  38 #include "lib/global.h"
  39 #include "lib/widget.h"   // message()
  40 #include "lib/vfs/vfs.h"  // mc_read()
  41 
  42 #include "tar-internal.h"
  43 
  44 /*** global variables ****************************************************************************/
  45 
  46 /*** file scope macro definitions ****************************************************************/
  47 
  48 #ifndef UINTMAX_WIDTH
  49 #    define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
  50 #endif
  51 
  52 /* Log base 2 of common values. */
  53 #define LG_256 8
  54 
  55 /*** file scope type declarations ****************************************************************/
  56 
  57 /*** forward declarations (file scope functions) *************************************************/
  58 
  59 /*** file scope variables ************************************************************************/
  60 
  61 /* Table of base-64 digit values + 1, indexed by unsigned chars.
  62    See Internet RFC 2045 Table 1.
  63    Zero entries are for unsigned chars that are not base-64 digits.  */
  64 static char const base64_map[UCHAR_MAX + 1] = {
  65     ['A'] = 0 + 1,  ['B'] = 1 + 1,  ['C'] = 2 + 1,  ['D'] = 3 + 1,  ['E'] = 4 + 1,  ['F'] = 5 + 1,
  66     ['G'] = 6 + 1,  ['H'] = 7 + 1,  ['I'] = 8 + 1,  ['J'] = 9 + 1,  ['K'] = 10 + 1, ['L'] = 11 + 1,
  67     ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1, ['Q'] = 16 + 1, ['R'] = 17 + 1,
  68     ['S'] = 18 + 1, ['T'] = 19 + 1, ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
  69     ['Y'] = 24 + 1, ['Z'] = 25 + 1, ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
  70     ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1, ['i'] = 34 + 1, ['j'] = 35 + 1,
  71     ['k'] = 36 + 1, ['l'] = 37 + 1, ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
  72     ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1, ['u'] = 46 + 1, ['v'] = 47 + 1,
  73     ['w'] = 48 + 1, ['x'] = 49 + 1, ['y'] = 50 + 1, ['z'] = 51 + 1, ['0'] = 52 + 1, ['1'] = 53 + 1,
  74     ['2'] = 54 + 1, ['3'] = 55 + 1, ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
  75     ['8'] = 60 + 1, ['9'] = 61 + 1, ['+'] = 62 + 1, ['/'] = 63 + 1,
  76 };
  77 
  78 /* --------------------------------------------------------------------------------------------- */
  79 /*** file scope functions ************************************************************************/
  80 /* --------------------------------------------------------------------------------------------- */
  81 
  82 static gboolean
  83 tar_short_read (size_t status, tar_super_t *archive)
     /*   */
  84 {
  85     size_t left;  // bytes left
  86     char *more;   // pointer to next byte to read
  87 
  88     more = archive->record_start->buffer + status;
  89     left = record_size - status;
  90 
  91     while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
  92     {
  93         if (status != 0)
  94         {
  95             ssize_t r;
  96 
  97             r = mc_read (archive->fd, more, left);
  98             if (r == -1)
  99                 return FALSE;
 100 
 101             status = (size_t) r;
 102         }
 103 
 104         if (status == 0)
 105             break;
 106 
 107         left -= status;
 108         more += status;
 109     }
 110 
 111     record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
 112 
 113     return TRUE;
 114 }
 115 
 116 /* --------------------------------------------------------------------------------------------- */
 117 
 118 static gboolean
 119 tar_flush_read (tar_super_t *archive)
     /*   */
 120 {
 121     size_t status;
 122 
 123     status = mc_read (archive->fd, archive->record_start->buffer, record_size);
 124     if ((idx_t) status == record_size)
 125         return TRUE;
 126 
 127     return tar_short_read (status, archive);
 128 }
 129 
 130 /* --------------------------------------------------------------------------------------------- */
 131 
 132 /**  Flush the current buffer from the archive.
 133  */
 134 static gboolean
 135 tar_flush_archive (tar_super_t *archive)
     /*   */
 136 {
 137     record_start_block += record_end - archive->record_start;
 138     current_block = archive->record_start;
 139     record_end = archive->record_start + blocking_factor;
 140 
 141     return tar_flush_read (archive);
 142 }
 143 
 144 /* --------------------------------------------------------------------------------------------- */
 145 
 146 static off_t
 147 tar_seek_archive (tar_super_t *archive, off_t size)
     /*   */
 148 {
 149     off_t start, offset;
 150     off_t nrec, nblk;
 151     off_t skipped;
 152 
 153     // If low level I/O is already at EOF, do not try to seek further.
 154     if (record_end < archive->record_start + blocking_factor)
 155         return 0;
 156 
 157     skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
 158     if (size <= skipped)
 159         return 0;
 160 
 161     // Compute number of records to skip
 162     nrec = (size - skipped) / record_size;
 163     if (nrec == 0)
 164         return 0;
 165 
 166     start = tar_current_block_ordinal (archive);
 167 
 168     offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
 169     if (offset < 0)
 170         return offset;
 171 
 172 #if 0
 173     if ((offset % record_size) != 0)
 174     {
 175         message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
 176         return -1;
 177     }
 178 #endif
 179 
 180     // Convert to number of records
 181     offset /= BLOCKSIZE;
 182     // Compute number of skipped blocks
 183     nblk = offset - start;
 184 
 185     // Update buffering info
 186     record_start_block = offset - blocking_factor;
 187     current_block = record_end;
 188 
 189     return nblk;
 190 }
 191 
 192 /* --------------------------------------------------------------------------------------------- */
 193 /*** public functions ****************************************************************************/
 194 /* --------------------------------------------------------------------------------------------- */
 195 
 196 gboolean
 197 is_octal_digit (char c)
     /*   */
 198 {
 199     return '0' <= c && c <= '7';
 200 }
 201 
 202 /* --------------------------------------------------------------------------------------------- */
 203 
 204 void
 205 tar_assign_string (char **string, char *value)
     /*   */
 206 {
 207     g_free (*string);
 208     *string = value;
 209 }
 210 
 211 /* --------------------------------------------------------------------------------------------- */
 212 
 213 void
 214 tar_assign_string_dup (char **string, const char *value)
     /*   */
 215 {
 216     g_free (*string);
 217     *string = g_strdup (value);
 218 }
 219 
 220 /* --------------------------------------------------------------------------------------------- */
 221 
 222 void
 223 tar_assign_string_dup_n (char **string, const char *value, size_t n)
     /*   */
 224 {
 225     g_free (*string);
 226     *string = g_strndup (value, n);
 227 }
 228 
 229 /* --------------------------------------------------------------------------------------------- */
 230 
 231 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
 232    to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
 233    the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
 234    value. @minval must not be positive.
 235 
 236    If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
 237    are assumed to be represented using leading '-' in the usual way. If the represented value
 238    exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
 239    value.
 240 
 241    On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
 242    then return 0.
 243 
 244    Sample call to this function:
 245 
 246    char *s_end;
 247    gboolean overflow;
 248    idx_t i;
 249 
 250    i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
 251    if ((s_end == s) | (s_end == '\0') | overflow)
 252    diagnose_invalid (s);
 253 
 254    This example uses "|" instead of "||" for fewer branches at runtime,
 255    which tends to be more efficient on modern processors.
 256 
 257    This function is named "stoint" instead of "strtoint" because
 258    <string.h> reserves names beginning with "str".
 259  */
 260 #if !(INTMAX_MAX <= UINTMAX_MAX)
 261 #    error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
 262 #endif
 263 intmax_t
 264 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
     /*   */
 265 {
 266     char const *p = arg;
 267     intmax_t i;
 268     int v = 0;
 269 
 270     if (isdigit (*p))
 271     {
 272         if (minval <= 0)
 273         {
 274             i = *p - '0';
 275 
 276             while (isdigit (*++p) != 0)
 277             {
 278                 v |= ckd_mul (&i, i, 10) ? 1 : 0;
 279                 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
 280             }
 281 
 282             v |= maxval < (uintmax_t) i ? 1 : 0;
 283             if (v != 0)
 284                 i = maxval;
 285         }
 286         else
 287         {
 288             uintmax_t u = *p - '0';
 289 
 290             while (isdigit (*++p) != 0)
 291             {
 292                 v |= ckd_mul (&u, u, 10) ? 1 : 0;
 293                 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
 294             }
 295 
 296             v |= maxval < u ? 1 : 0;
 297             if (v != 0)
 298                 u = maxval;
 299             i = tar_represent_uintmax (u);
 300         }
 301     }
 302     else if (minval < 0 && *p == '-' && isdigit (p[1]))
 303     {
 304         p++;
 305         i = -(*p - '0');
 306 
 307         while (isdigit (*++p) != 0)
 308         {
 309             v |= ckd_mul (&i, i, 10) ? 1 : 0;
 310             v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
 311         }
 312 
 313         v |= i < minval ? 1 : 0;
 314         if (v != 0)
 315             i = minval;
 316     }
 317     else
 318         i = 0;
 319 
 320     if (arglim != NULL)
 321         *arglim = (char *) p;
 322     if (overflow != NULL)
 323         *overflow = v != 0;
 324     return i;
 325 }
 326 
 327 /* --------------------------------------------------------------------------------------------- */
 328 
 329 /**
 330  * Convert buffer at @where0 of size @digs from external format to intmax_t.
 331  * @digs must be positive.
 332  * If @type is non-NULL, data are of type @type.
 333  * The buffer must represent a value in the range -@minval through @maxval;
 334  * if the mathematically correct result V would be greater than INTMAX_MAX,
 335  * return a negative integer V such that (uintmax_t) V yields the correct result.
 336  * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 337  *
 338  * Result is -1 if the field is invalid.
 339  */
 340 #if !(INTMAX_MAX <= UINTMAX_MAX && -(INTMAX_MIN + 1) <= UINTMAX_MAX)
 341 #    error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
 342 #endif
 343 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
 344 #    error "tar_from_header() returns intmax_t to represent uintmax_t"
 345 #endif
 346 intmax_t
 347 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
     /*   */
 348                  uintmax_t maxval, gboolean octal_only)
 349 {
 350     uintmax_t value = 0;
 351     uintmax_t uminval = minval;
 352     uintmax_t minus_minval = -uminval;
 353     const char *where = where0;
 354     char const *lim = where + digs;
 355     gboolean negative = FALSE;
 356 
 357     /* Accommodate buggy tar of unknown vintage, which outputs leading
 358        NUL if the previous field overflows. */
 359     if (*where == '\0')
 360         where++;
 361 
 362     // Accommodate older tars, which output leading spaces.
 363     while (TRUE)
 364     {
 365         if (where == lim)
 366             return (-1);
 367 
 368         if (!g_ascii_isspace (*where))
 369             break;
 370 
 371         where++;
 372     }
 373 
 374     if (is_octal_digit (*where))
 375     {
 376         char const *where1 = where;
 377         gboolean overflow = FALSE;
 378 
 379         while (TRUE)
 380         {
 381             value += *where++ - '0';
 382             if (where == lim || !is_octal_digit (*where))
 383                 break;
 384             overflow |= ckd_mul (&value, value, 8);
 385         }
 386 
 387         /* Parse the output of older, unportable tars, which generate
 388            negative values in two's complement octal. If the leading
 389            nonzero digit is 1, we can't recover the original value
 390            reliably; so do this only if the digit is 2 or more. This
 391            catches the common case of 32-bit negative time stamps. */
 392         if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
 393         {
 394             // Compute the negative of the input value, assuming two's complement.
 395             int digit;
 396 
 397             digit = (*where1 - '0') | 4;
 398             overflow = FALSE;
 399             value = 0;
 400             where = where1;
 401 
 402             while (TRUE)
 403             {
 404                 value += 7 - digit;
 405                 where++;
 406                 if (where == lim || !is_octal_digit (*where))
 407                     break;
 408                 digit = *where - '0';
 409                 overflow |= ckd_mul (&value, value, 8);
 410             }
 411 
 412             overflow |= ckd_add (&value, value, 1);
 413 
 414             if (!overflow && value <= minus_minval)
 415                 negative = TRUE;
 416         }
 417 
 418         if (overflow)
 419             return (-1);
 420     }
 421     else if (octal_only)
 422     {
 423         // Suppress the following extensions.
 424     }
 425     else if (*where == '-' || *where == '+')
 426     {
 427         /* Parse base-64 output produced only by tar test versions
 428            1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
 429            Support for this will be withdrawn in future tar releases. */
 430 
 431         negative = *where++ == '-';
 432 
 433         while (where != lim)
 434         {
 435             unsigned char uc = *where;
 436             char dig;
 437 
 438             dig = base64_map[uc];
 439             if (dig <= 0)
 440                 break;
 441 
 442             if (ckd_mul (&value, value, 64))
 443                 return (-1);
 444             value |= dig - 1;
 445             where++;
 446         }
 447     }
 448     else if (where <= lim - 2
 449              && (*where == '\200'  // positive base-256
 450                  || *where == '\377' /* negative base-256 */))
 451     {
 452         /* Parse base-256 output.  A nonnegative number N is
 453            represented as (256**DIGS)/2 + N; a negative number -N is
 454            represented as (256**DIGS) - N, i.e. as two's complement.
 455            The representation guarantees that the leading bit is
 456            always on, so that we don't confuse this format with the
 457            others (assuming ASCII bytes of 8 bits or more). */
 458 
 459         int signbit;
 460         uintmax_t topbits;
 461 
 462         signbit = *where & (1 << (LG_256 - 2));
 463         topbits = ((uintmax_t) -signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
 464 
 465         value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
 466 
 467         while (TRUE)
 468         {
 469             unsigned char uc;
 470 
 471             uc = *where++;
 472             value = (value << LG_256) + uc;
 473             if (where == lim)
 474                 break;
 475 
 476             if (((value << LG_256 >> LG_256) | topbits) != value)
 477                 return (-1);
 478         }
 479 
 480         negative = signbit != 0;
 481         if (negative)
 482             value = -value;
 483     }
 484 
 485     if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
 486         return (-1);
 487 
 488     if (value <= (negative ? minus_minval : maxval))
 489         return tar_represent_uintmax (negative ? -value : value);
 490 
 491     return (-1);
 492 }
 493 
 494 /* --------------------------------------------------------------------------------------------- */
 495 
 496 off_t
 497 off_from_header (const char *p, size_t s)
     /*   */
 498 {
 499     /* Negative offsets are not allowed in tar files, so invoke
 500        from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
 501     return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
 502 }
 503 
 504 /* --------------------------------------------------------------------------------------------- */
 505 
 506 /**
 507  * Return the location of the next available input or output block.
 508  * Return NULL for EOF.
 509  */
 510 union block *
 511 tar_find_next_block (tar_super_t *archive)
     /*   */
 512 {
 513     if (current_block == record_end)
 514     {
 515         if (hit_eof)
 516             return NULL;
 517 
 518         if (!tar_flush_archive (archive))
 519         {
 520             message (D_ERROR, MSG_ERROR, _ ("Inconsistent tar archive"));
 521             return NULL;
 522         }
 523 
 524         if (current_block == record_end)
 525         {
 526             hit_eof = TRUE;
 527             return NULL;
 528         }
 529     }
 530 
 531     return current_block;
 532 }
 533 
 534 /* --------------------------------------------------------------------------------------------- */
 535 
 536 /**
 537  * Indicate that we have used all blocks up thru @block.
 538  */
 539 gboolean
 540 tar_set_next_block_after (union block *block)
     /*   */
 541 {
 542     while (block >= current_block)
 543         current_block++;
 544 
 545     /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
 546        could mean the next block (if the input record is exactly one block long), which is not
 547        what is intended.  */
 548 
 549     return !(current_block > record_end);
 550 }
 551 
 552 /* --------------------------------------------------------------------------------------------- */
 553 
 554 /**
 555  * Compute and return the block ordinal at current_block.
 556  */
 557 off_t
 558 tar_current_block_ordinal (const tar_super_t *archive)
     /*   */
 559 {
 560     return record_start_block + (current_block - archive->record_start);
 561 }
 562 
 563 /* --------------------------------------------------------------------------------------------- */
 564 
 565 /**
 566  * Skip over @size bytes of data in blocks in the archive.
 567  */
 568 gboolean
 569 tar_skip_file (tar_super_t *archive, off_t size)
     /*   */
 570 {
 571     union block *x;
 572     off_t nblk;
 573 
 574     nblk = tar_seek_archive (archive, size);
 575     if (nblk >= 0)
 576         size -= nblk * BLOCKSIZE;
 577 
 578     while (size > 0)
 579     {
 580         x = tar_find_next_block (archive);
 581         if (x == NULL)
 582             return FALSE;
 583 
 584         tar_set_next_block_after (x);
 585         size -= BLOCKSIZE;
 586     }
 587 
 588     return TRUE;
 589 }
 590 
 591 /* --------------------------------------------------------------------------------------------- */
/* */
root/src/vfs/tar/tar-internal.c

DEFINITIONS