root/src/vfs/tar/tar-internal.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. tar_short_read
  2. tar_flush_read
  3. tar_flush_archive
  4. tar_seek_archive
  5. is_octal_digit
  6. tar_assign_string
  7. tar_assign_string_dup
  8. tar_assign_string_dup_n
  9. stoint
  10. tar_from_header
  11. off_from_header
  12. tar_find_next_block
  13. tar_set_next_block_after
  14. tar_current_block_ordinal
  15. tar_skip_file

   1 /*
   2    Virtual File System: GNU Tar file system.
   3 
   4    Copyright (C) 2023-2025
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Andrew Borodin <aborodin@vmail.ru>, 2023
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25 
  26 /**
  27  * \file
  28  * \brief Source: Virtual File System: GNU Tar file system
  29  * \author Andrew Borodin
  30  * \date 2022
  31  */
  32 
  33 #include <config.h>
  34 
  35 #include <ctype.h>              /* isdigit() */
  36 #include <inttypes.h>           /* uintmax_t */
  37 
  38 #include "lib/global.h"
  39 #include "lib/widget.h"         /* message() */
  40 #include "lib/vfs/vfs.h"        /* mc_read() */
  41 
  42 #include "tar-internal.h"
  43 
  44 /*** global variables ****************************************************************************/
  45 
  46 /*** file scope macro definitions ****************************************************************/
  47 
  48 #ifndef UINTMAX_WIDTH
  49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
  50 #endif
  51 
  52 /* Log base 2 of common values. */
  53 #define LG_256 8
  54 
  55 /*** file scope type declarations ****************************************************************/
  56 
  57 /*** forward declarations (file scope functions) *************************************************/
  58 
  59 /*** file scope variables ************************************************************************/
  60 
  61 /* Table of base-64 digit values + 1, indexed by unsigned chars.
  62    See Internet RFC 2045 Table 1.
  63    Zero entries are for unsigned chars that are not base-64 digits.  */
  64 /* *INDENT-OFF* */
  65 static char const base64_map[UCHAR_MAX + 1] =
  66 {
  67     ['A'] =  0 + 1, ['B'] =  1 + 1, ['C'] =  2 + 1, ['D'] =  3 + 1,
  68     ['E'] =  4 + 1, ['F'] =  5 + 1, ['G'] =  6 + 1, ['H'] =  7 + 1,
  69     ['I'] =  8 + 1, ['J'] =  9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
  70     ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1,
  71     ['Q'] = 16 + 1, ['R'] = 17 + 1, ['S'] = 18 + 1, ['T'] = 19 + 1,
  72     ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
  73     ['Y'] = 24 + 1, ['Z'] = 25 + 1,
  74     ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
  75     ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1,
  76     ['i'] = 34 + 1, ['j'] = 35 + 1, ['k'] = 36 + 1, ['l'] = 37 + 1,
  77     ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
  78     ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1,
  79     ['u'] = 46 + 1, ['v'] = 47 + 1, ['w'] = 48 + 1, ['x'] = 49 + 1,
  80     ['y'] = 50 + 1, ['z'] = 51 + 1,
  81     ['0'] = 52 + 1, ['1'] = 53 + 1, ['2'] = 54 + 1, ['3'] = 55 + 1,
  82     ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
  83     ['8'] = 60 + 1, ['9'] = 61 + 1,
  84     ['+'] = 62 + 1, ['/'] = 63 + 1,
  85 };
  86 /* *INDENT-ON* */
  87 
  88 /* --------------------------------------------------------------------------------------------- */
  89 /*** file scope functions ************************************************************************/
  90 /* --------------------------------------------------------------------------------------------- */
  91 
  92 static gboolean
  93 tar_short_read (size_t status, tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
  94 {
  95     size_t left;                /* bytes left */
  96     char *more;                 /* pointer to next byte to read */
  97 
  98     more = archive->record_start->buffer + status;
  99     left = record_size - status;
 100 
 101     while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
 102     {
 103         if (status != 0)
 104         {
 105             ssize_t r;
 106 
 107             r = mc_read (archive->fd, more, left);
 108             if (r == -1)
 109                 return FALSE;
 110 
 111             status = (size_t) r;
 112         }
 113 
 114         if (status == 0)
 115             break;
 116 
 117         left -= status;
 118         more += status;
 119     }
 120 
 121     record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
 122 
 123     return TRUE;
 124 }
 125 
 126 /* --------------------------------------------------------------------------------------------- */
 127 
 128 static gboolean
 129 tar_flush_read (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 130 {
 131     size_t status;
 132 
 133     status = mc_read (archive->fd, archive->record_start->buffer, record_size);
 134     if ((idx_t) status == record_size)
 135         return TRUE;
 136 
 137     return tar_short_read (status, archive);
 138 }
 139 
 140 /* --------------------------------------------------------------------------------------------- */
 141 
 142 /**  Flush the current buffer from the archive.
 143  */
 144 static gboolean
 145 tar_flush_archive (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 146 {
 147     record_start_block += record_end - archive->record_start;
 148     current_block = archive->record_start;
 149     record_end = archive->record_start + blocking_factor;
 150 
 151     return tar_flush_read (archive);
 152 }
 153 
 154 /* --------------------------------------------------------------------------------------------- */
 155 
 156 static off_t
 157 tar_seek_archive (tar_super_t *archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 158 {
 159     off_t start, offset;
 160     off_t nrec, nblk;
 161     off_t skipped;
 162 
 163     /* If low level I/O is already at EOF, do not try to seek further. */
 164     if (record_end < archive->record_start + blocking_factor)
 165         return 0;
 166 
 167     skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
 168     if (size <= skipped)
 169         return 0;
 170 
 171     /* Compute number of records to skip */
 172     nrec = (size - skipped) / record_size;
 173     if (nrec == 0)
 174         return 0;
 175 
 176     start = tar_current_block_ordinal (archive);
 177 
 178     offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
 179     if (offset < 0)
 180         return offset;
 181 
 182 #if 0
 183     if ((offset % record_size) != 0)
 184     {
 185         message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
 186         return -1;
 187     }
 188 #endif
 189 
 190     /* Convert to number of records */
 191     offset /= BLOCKSIZE;
 192     /* Compute number of skipped blocks */
 193     nblk = offset - start;
 194 
 195     /* Update buffering info */
 196     record_start_block = offset - blocking_factor;
 197     current_block = record_end;
 198 
 199     return nblk;
 200 }
 201 
 202 /* --------------------------------------------------------------------------------------------- */
 203 /*** public functions ****************************************************************************/
 204 /* --------------------------------------------------------------------------------------------- */
 205 
 206 gboolean
 207 is_octal_digit (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 208 {
 209     return '0' <= c && c <= '7';
 210 }
 211 
 212 /* --------------------------------------------------------------------------------------------- */
 213 
 214 void
 215 tar_assign_string (char **string, char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 216 {
 217     g_free (*string);
 218     *string = value;
 219 }
 220 
 221 /* --------------------------------------------------------------------------------------------- */
 222 
 223 void
 224 tar_assign_string_dup (char **string, const char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 225 {
 226     g_free (*string);
 227     *string = g_strdup (value);
 228 }
 229 
 230 /* --------------------------------------------------------------------------------------------- */
 231 
 232 void
 233 tar_assign_string_dup_n (char **string, const char *value, size_t n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 234 {
 235     g_free (*string);
 236     *string = g_strndup (value, n);
 237 }
 238 
 239 /* --------------------------------------------------------------------------------------------- */
 240 
 241 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
 242    to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
 243    the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
 244    value. @minval must not be positive.
 245 
 246    If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
 247    are assumed to be represented using leading '-' in the usual way. If the represented value
 248    exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
 249    value.
 250 
 251    On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
 252    then return 0.
 253 
 254    Sample call to this function:
 255 
 256    char *s_end;
 257    gboolean overflow;
 258    idx_t i;
 259 
 260    i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
 261    if ((s_end == s) | (s_end == '\0') | overflow)
 262    diagnose_invalid (s);
 263 
 264    This example uses "|" instead of "||" for fewer branches at runtime,
 265    which tends to be more efficient on modern processors.
 266 
 267    This function is named "stoint" instead of "strtoint" because
 268    <string.h> reserves names beginning with "str".
 269  */
 270 #if ! (INTMAX_MAX <= UINTMAX_MAX)
 271 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
 272 #endif
 273 intmax_t
 274 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
     /* [previous][next][first][last][top][bottom][index][help]  */
 275 {
 276     char const *p = arg;
 277     intmax_t i;
 278     int v = 0;
 279 
 280     if (isdigit (*p))
 281     {
 282         if (minval <= 0)
 283         {
 284             i = *p - '0';
 285 
 286             while (isdigit (*++p) != 0)
 287             {
 288                 v |= ckd_mul (&i, i, 10) ? 1 : 0;
 289                 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
 290             }
 291 
 292             v |= maxval < (uintmax_t) i ? 1 : 0;
 293             if (v != 0)
 294                 i = maxval;
 295         }
 296         else
 297         {
 298             uintmax_t u = *p - '0';
 299 
 300             while (isdigit (*++p) != 0)
 301             {
 302                 v |= ckd_mul (&u, u, 10) ? 1 : 0;
 303                 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
 304             }
 305 
 306             v |= maxval < u ? 1 : 0;
 307             if (v != 0)
 308                 u = maxval;
 309             i = tar_represent_uintmax (u);
 310         }
 311     }
 312     else if (minval < 0 && *p == '-' && isdigit (p[1]))
 313     {
 314         p++;
 315         i = -(*p - '0');
 316 
 317         while (isdigit (*++p) != 0)
 318         {
 319             v |= ckd_mul (&i, i, 10) ? 1 : 0;
 320             v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
 321         }
 322 
 323         v |= i < minval ? 1 : 0;
 324         if (v != 0)
 325             i = minval;
 326     }
 327     else
 328         i = 0;
 329 
 330     if (arglim != NULL)
 331         *arglim = (char *) p;
 332     if (overflow != NULL)
 333         *overflow = v != 0;
 334     return i;
 335 }
 336 
 337 /* --------------------------------------------------------------------------------------------- */
 338 
 339 /**
 340  * Convert buffer at @where0 of size @digs from external format to intmax_t.
 341  * @digs must be positive.
 342  * If @type is non-NULL, data are of type @type.
 343  * The buffer must represent a value in the range -@minval through @maxval;
 344  * if the mathematically correct result V would be greater than INTMAX_MAX,
 345  * return a negative integer V such that (uintmax_t) V yields the correct result.
 346  * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 347  *
 348  * Result is -1 if the field is invalid.
 349  */
 350 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
 351 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
 352 #endif
 353 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
 354 #error "tar_from_header() returns intmax_t to represent uintmax_t"
 355 #endif
 356 intmax_t
 357 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
     /* [previous][next][first][last][top][bottom][index][help]  */
 358                  uintmax_t maxval, gboolean octal_only)
 359 {
 360     uintmax_t value = 0;
 361     uintmax_t uminval = minval;
 362     uintmax_t minus_minval = -uminval;
 363     const char *where = where0;
 364     char const *lim = where + digs;
 365     gboolean negative = FALSE;
 366 
 367     /* Accommodate buggy tar of unknown vintage, which outputs leading
 368        NUL if the previous field overflows. */
 369     if (*where == '\0')
 370         where++;
 371 
 372     /* Accommodate older tars, which output leading spaces. */
 373     while (TRUE)
 374     {
 375         if (where == lim)
 376             return (-1);
 377 
 378         if (!g_ascii_isspace (*where))
 379             break;
 380 
 381         where++;
 382     }
 383 
 384     if (is_octal_digit (*where))
 385     {
 386         char const *where1 = where;
 387         gboolean overflow = FALSE;
 388 
 389         while (TRUE)
 390         {
 391             value += *where++ - '0';
 392             if (where == lim || !is_octal_digit (*where))
 393                 break;
 394             overflow |= ckd_mul (&value, value, 8);
 395         }
 396 
 397         /* Parse the output of older, unportable tars, which generate
 398            negative values in two's complement octal. If the leading
 399            nonzero digit is 1, we can't recover the original value
 400            reliably; so do this only if the digit is 2 or more. This
 401            catches the common case of 32-bit negative time stamps. */
 402         if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
 403         {
 404             /* Compute the negative of the input value, assuming two's complement. */
 405             int digit;
 406 
 407             digit = (*where1 - '0') | 4;
 408             overflow = FALSE;
 409             value = 0;
 410             where = where1;
 411 
 412             while (TRUE)
 413             {
 414                 value += 7 - digit;
 415                 where++;
 416                 if (where == lim || !is_octal_digit (*where))
 417                     break;
 418                 digit = *where - '0';
 419                 overflow |= ckd_mul (&value, value, 8);
 420             }
 421 
 422             overflow |= ckd_add (&value, value, 1);
 423 
 424             if (!overflow && value <= minus_minval)
 425                 negative = TRUE;
 426         }
 427 
 428         if (overflow)
 429             return (-1);
 430     }
 431     else if (octal_only)
 432     {
 433         /* Suppress the following extensions. */
 434     }
 435     else if (*where == '-' || *where == '+')
 436     {
 437         /* Parse base-64 output produced only by tar test versions
 438            1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
 439            Support for this will be withdrawn in future tar releases. */
 440 
 441         negative = *where++ == '-';
 442 
 443         while (where != lim)
 444         {
 445             unsigned char uc = *where;
 446             char dig;
 447 
 448             dig = base64_map[uc];
 449             if (dig <= 0)
 450                 break;
 451 
 452             if (ckd_mul (&value, value, 64))
 453                 return (-1);
 454             value |= dig - 1;
 455             where++;
 456         }
 457     }
 458     else if (where <= lim - 2 && (*where == '\200'      /* positive base-256 */
 459                                   || *where == '\377' /* negative base-256 */ ))
 460     {
 461         /* Parse base-256 output.  A nonnegative number N is
 462            represented as (256**DIGS)/2 + N; a negative number -N is
 463            represented as (256**DIGS) - N, i.e. as two's complement.
 464            The representation guarantees that the leading bit is
 465            always on, so that we don't confuse this format with the
 466            others (assuming ASCII bytes of 8 bits or more). */
 467 
 468         int signbit;
 469         uintmax_t topbits;
 470 
 471         signbit = *where & (1 << (LG_256 - 2));
 472         topbits = ((uintmax_t) - signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
 473 
 474         value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
 475 
 476         while (TRUE)
 477         {
 478             unsigned char uc;
 479 
 480             uc = *where++;
 481             value = (value << LG_256) + uc;
 482             if (where == lim)
 483                 break;
 484 
 485             if (((value << LG_256 >> LG_256) | topbits) != value)
 486                 return (-1);
 487         }
 488 
 489         negative = signbit != 0;
 490         if (negative)
 491             value = -value;
 492     }
 493 
 494     if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
 495         return (-1);
 496 
 497     if (value <= (negative ? minus_minval : maxval))
 498         return tar_represent_uintmax (negative ? -value : value);
 499 
 500     return (-1);
 501 }
 502 
 503 /* --------------------------------------------------------------------------------------------- */
 504 
 505 off_t
 506 off_from_header (const char *p, size_t s)
     /* [previous][next][first][last][top][bottom][index][help]  */
 507 {
 508     /* Negative offsets are not allowed in tar files, so invoke
 509        from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
 510     return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
 511 }
 512 
 513 /* --------------------------------------------------------------------------------------------- */
 514 
 515 /**
 516  * Return the location of the next available input or output block.
 517  * Return NULL for EOF.
 518  */
 519 union block *
 520 tar_find_next_block (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 521 {
 522     if (current_block == record_end)
 523     {
 524         if (hit_eof)
 525             return NULL;
 526 
 527         if (!tar_flush_archive (archive))
 528         {
 529             message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
 530             return NULL;
 531         }
 532 
 533         if (current_block == record_end)
 534         {
 535             hit_eof = TRUE;
 536             return NULL;
 537         }
 538     }
 539 
 540     return current_block;
 541 }
 542 
 543 /* --------------------------------------------------------------------------------------------- */
 544 
 545 /**
 546  * Indicate that we have used all blocks up thru @block.
 547  */
 548 gboolean
 549 tar_set_next_block_after (union block *block)
     /* [previous][next][first][last][top][bottom][index][help]  */
 550 {
 551     while (block >= current_block)
 552         current_block++;
 553 
 554     /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
 555        could mean the next block (if the input record is exactly one block long), which is not
 556        what is intended.  */
 557 
 558     return !(current_block > record_end);
 559 }
 560 
 561 /* --------------------------------------------------------------------------------------------- */
 562 
 563 /**
 564  * Compute and return the block ordinal at current_block.
 565  */
 566 off_t
 567 tar_current_block_ordinal (const tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 568 {
 569     return record_start_block + (current_block - archive->record_start);
 570 }
 571 
 572 /* --------------------------------------------------------------------------------------------- */
 573 
 574 /**
 575  * Skip over @size bytes of data in blocks in the archive.
 576  */
 577 gboolean
 578 tar_skip_file (tar_super_t *archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 579 {
 580     union block *x;
 581     off_t nblk;
 582 
 583     nblk = tar_seek_archive (archive, size);
 584     if (nblk >= 0)
 585         size -= nblk * BLOCKSIZE;
 586 
 587     while (size > 0)
 588     {
 589         x = tar_find_next_block (archive);
 590         if (x == NULL)
 591             return FALSE;
 592 
 593         tar_set_next_block_after (x);
 594         size -= BLOCKSIZE;
 595     }
 596 
 597     return TRUE;
 598 }
 599 
 600 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */