root/src/vfs/tar/tar-internal.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. tar_short_read
  2. tar_flush_read
  3. tar_flush_archive
  4. tar_seek_archive
  5. is_octal_digit
  6. tar_assign_string
  7. tar_assign_string_dup
  8. tar_assign_string_dup_n
  9. stoint
  10. tar_from_header
  11. off_from_header
  12. tar_find_next_block
  13. tar_set_next_block_after
  14. tar_current_block_ordinal
  15. tar_skip_file

   1 /*
   2    Virtual File System: GNU Tar file system.
   3 
   4    Copyright (C) 2023-2024
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Andrew Borodin <aborodin@vmail.ru>, 2023
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25 
  26 /**
  27  * \file
  28  * \brief Source: Virtual File System: GNU Tar file system
  29  * \author Andrew Borodin
  30  * \date 2022
  31  */
  32 
  33 #include <config.h>
  34 
  35 #include <ctype.h>              /* isdigit() */
  36 #include <inttypes.h>           /* uintmax_t */
  37 
  38 #include "lib/global.h"
  39 #include "lib/widget.h"         /* message() */
  40 #include "lib/vfs/vfs.h"        /* mc_read() */
  41 
  42 #include "tar-internal.h"
  43 
  44 /*** global variables ****************************************************************************/
  45 
  46 /*** file scope macro definitions ****************************************************************/
  47 
  48 #ifndef UINTMAX_WIDTH
  49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
  50 #endif
  51 
  52 /* Log base 2 of common values. */
  53 #define LG_8 3
  54 #define LG_256 8
  55 
  56 /*** file scope type declarations ****************************************************************/
  57 
  58 /*** forward declarations (file scope functions) *************************************************/
  59 
  60 /*** file scope variables ************************************************************************/
  61 
  62 /* Table of base-64 digit values + 1, indexed by unsigned chars.
  63    See Internet RFC 2045 Table 1.
  64    Zero entries are for unsigned chars that are not base-64 digits.  */
  65 /* *INDENT-OFF* */
  66 static char const base64_map[UCHAR_MAX + 1] =
  67 {
  68     ['A'] =  0 + 1, ['B'] =  1 + 1, ['C'] =  2 + 1, ['D'] =  3 + 1,
  69     ['E'] =  4 + 1, ['F'] =  5 + 1, ['G'] =  6 + 1, ['H'] =  7 + 1,
  70     ['I'] =  8 + 1, ['J'] =  9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
  71     ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1,
  72     ['Q'] = 16 + 1, ['R'] = 17 + 1, ['S'] = 18 + 1, ['T'] = 19 + 1,
  73     ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
  74     ['Y'] = 24 + 1, ['Z'] = 25 + 1,
  75     ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
  76     ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1,
  77     ['i'] = 34 + 1, ['j'] = 35 + 1, ['k'] = 36 + 1, ['l'] = 37 + 1,
  78     ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
  79     ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1,
  80     ['u'] = 46 + 1, ['v'] = 47 + 1, ['w'] = 48 + 1, ['x'] = 49 + 1,
  81     ['y'] = 50 + 1, ['z'] = 51 + 1,
  82     ['0'] = 52 + 1, ['1'] = 53 + 1, ['2'] = 54 + 1, ['3'] = 55 + 1,
  83     ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
  84     ['8'] = 60 + 1, ['9'] = 61 + 1,
  85     ['+'] = 62 + 1, ['/'] = 63 + 1,
  86 };
  87 /* *INDENT-ON* */
  88 
  89 /* --------------------------------------------------------------------------------------------- */
  90 /*** file scope functions ************************************************************************/
  91 /* --------------------------------------------------------------------------------------------- */
  92 
  93 static gboolean
  94 tar_short_read (size_t status, tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
  95 {
  96     size_t left;                /* bytes left */
  97     char *more;                 /* pointer to next byte to read */
  98 
  99     more = archive->record_start->buffer + status;
 100     left = record_size - status;
 101 
 102     while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
 103     {
 104         if (status != 0)
 105         {
 106             ssize_t r;
 107 
 108             r = mc_read (archive->fd, more, left);
 109             if (r == -1)
 110                 return FALSE;
 111 
 112             status = (size_t) r;
 113         }
 114 
 115         if (status == 0)
 116             break;
 117 
 118         left -= status;
 119         more += status;
 120     }
 121 
 122     record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
 123 
 124     return TRUE;
 125 }
 126 
 127 /* --------------------------------------------------------------------------------------------- */
 128 
 129 static gboolean
 130 tar_flush_read (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 131 {
 132     size_t status;
 133 
 134     status = mc_read (archive->fd, archive->record_start->buffer, record_size);
 135     if ((idx_t) status == record_size)
 136         return TRUE;
 137 
 138     return tar_short_read (status, archive);
 139 }
 140 
 141 /* --------------------------------------------------------------------------------------------- */
 142 
 143 /**  Flush the current buffer from the archive.
 144  */
 145 static gboolean
 146 tar_flush_archive (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 147 {
 148     record_start_block += record_end - archive->record_start;
 149     current_block = archive->record_start;
 150     record_end = archive->record_start + blocking_factor;
 151 
 152     return tar_flush_read (archive);
 153 }
 154 
 155 /* --------------------------------------------------------------------------------------------- */
 156 
 157 static off_t
 158 tar_seek_archive (tar_super_t *archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 159 {
 160     off_t start, offset;
 161     off_t nrec, nblk;
 162     off_t skipped;
 163 
 164     /* If low level I/O is already at EOF, do not try to seek further. */
 165     if (record_end < archive->record_start + blocking_factor)
 166         return 0;
 167 
 168     skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
 169     if (size <= skipped)
 170         return 0;
 171 
 172     /* Compute number of records to skip */
 173     nrec = (size - skipped) / record_size;
 174     if (nrec == 0)
 175         return 0;
 176 
 177     start = tar_current_block_ordinal (archive);
 178 
 179     offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
 180     if (offset < 0)
 181         return offset;
 182 
 183 #if 0
 184     if ((offset % record_size) != 0)
 185     {
 186         message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
 187         return -1;
 188     }
 189 #endif
 190 
 191     /* Convert to number of records */
 192     offset /= BLOCKSIZE;
 193     /* Compute number of skipped blocks */
 194     nblk = offset - start;
 195 
 196     /* Update buffering info */
 197     record_start_block = offset - blocking_factor;
 198     current_block = record_end;
 199 
 200     return nblk;
 201 }
 202 
 203 /* --------------------------------------------------------------------------------------------- */
 204 /*** public functions ****************************************************************************/
 205 /* --------------------------------------------------------------------------------------------- */
 206 
 207 gboolean
 208 is_octal_digit (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 209 {
 210     return '0' <= c && c <= '7';
 211 }
 212 
 213 /* --------------------------------------------------------------------------------------------- */
 214 
 215 void
 216 tar_assign_string (char **string, char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 217 {
 218     g_free (*string);
 219     *string = value;
 220 }
 221 
 222 /* --------------------------------------------------------------------------------------------- */
 223 
 224 void
 225 tar_assign_string_dup (char **string, const char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 226 {
 227     g_free (*string);
 228     *string = g_strdup (value);
 229 }
 230 
 231 /* --------------------------------------------------------------------------------------------- */
 232 
 233 void
 234 tar_assign_string_dup_n (char **string, const char *value, size_t n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 235 {
 236     g_free (*string);
 237     *string = g_strndup (value, n);
 238 }
 239 
 240 /* --------------------------------------------------------------------------------------------- */
 241 
 242 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
 243    to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
 244    the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
 245    value. @minval must not be positive.
 246 
 247    If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
 248    are assumed to be represented using leading '-' in the usual way. If the represented value
 249    exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
 250    value.
 251 
 252    On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
 253    then return 0.
 254 
 255    Sample call to this function:
 256 
 257    char *s_end;
 258    gboolean overflow;
 259    idx_t i;
 260 
 261    i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
 262    if ((s_end == s) | (s_end == '\0') | overflow)
 263    diagnose_invalid (s);
 264 
 265    This example uses "|" instead of "||" for fewer branches at runtime,
 266    which tends to be more efficient on modern processors.
 267 
 268    This function is named "stoint" instead of "strtoint" because
 269    <string.h> reserves names beginning with "str".
 270  */
 271 #if ! (INTMAX_MAX <= UINTMAX_MAX)
 272 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
 273 #endif
 274 intmax_t
 275 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
     /* [previous][next][first][last][top][bottom][index][help]  */
 276 {
 277     char const *p = arg;
 278     intmax_t i;
 279     int v = 0;
 280 
 281     if (isdigit (*p))
 282     {
 283         if (minval <= 0)
 284         {
 285             i = *p - '0';
 286 
 287             while (isdigit (*++p) != 0)
 288             {
 289                 v |= ckd_mul (&i, i, 10) ? 1 : 0;
 290                 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
 291             }
 292 
 293             v |= maxval < (uintmax_t) i ? 1 : 0;
 294             if (v != 0)
 295                 i = maxval;
 296         }
 297         else
 298         {
 299             uintmax_t u = *p - '0';
 300 
 301             while (isdigit (*++p) != 0)
 302             {
 303                 v |= ckd_mul (&u, u, 10) ? 1 : 0;
 304                 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
 305             }
 306 
 307             v |= maxval < u ? 1 : 0;
 308             if (v != 0)
 309                 u = maxval;
 310             i = tar_represent_uintmax (u);
 311         }
 312     }
 313     else if (minval < 0 && *p == '-' && isdigit (p[1]))
 314     {
 315         p++;
 316         i = -(*p - '0');
 317 
 318         while (isdigit (*++p) != 0)
 319         {
 320             v |= ckd_mul (&i, i, 10) ? 1 : 0;
 321             v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
 322         }
 323 
 324         v |= i < minval ? 1 : 0;
 325         if (v != 0)
 326             i = minval;
 327     }
 328     else
 329         i = 0;
 330 
 331     if (arglim != NULL)
 332         *arglim = (char *) p;
 333     if (overflow != NULL)
 334         *overflow = v != 0;
 335     return i;
 336 }
 337 
 338 /* --------------------------------------------------------------------------------------------- */
 339 
 340 /**
 341  * Convert buffer at @where0 of size @digs from external format to intmax_t.
 342  * @digs must be positive.
 343  * If @type is non-NULL, data are of type @type.
 344  * The buffer must represent a value in the range -@minval through @maxval;
 345  * if the mathematically correct result V would be greater than INTMAX_MAX,
 346  * return a negative integer V such that (uintmax_t) V yields the correct result.
 347  * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 348  *
 349  * Result is -1 if the field is invalid.
 350  */
 351 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
 352 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
 353 #endif
 354 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
 355 #error "tar_from_header() returns intmax_t to represent uintmax_t"
 356 #endif
 357 intmax_t
 358 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
     /* [previous][next][first][last][top][bottom][index][help]  */
 359                  uintmax_t maxval, gboolean octal_only)
 360 {
 361     uintmax_t value = 0;
 362     uintmax_t uminval = minval;
 363     uintmax_t minus_minval = -uminval;
 364     const char *where = where0;
 365     char const *lim = where + digs;
 366     gboolean negative = FALSE;
 367 
 368     /* Accommodate buggy tar of unknown vintage, which outputs leading
 369        NUL if the previous field overflows. */
 370     if (*where == '\0')
 371         where++;
 372 
 373     /* Accommodate older tars, which output leading spaces. */
 374     while (TRUE)
 375     {
 376         if (where == lim)
 377             return (-1);
 378 
 379         if (!g_ascii_isspace (*where))
 380             break;
 381 
 382         where++;
 383     }
 384 
 385     if (is_octal_digit (*where))
 386     {
 387         char const *where1 = where;
 388         gboolean overflow = FALSE;
 389 
 390         while (TRUE)
 391         {
 392             value += *where++ - '0';
 393             if (where == lim || !is_octal_digit (*where))
 394                 break;
 395             overflow |= ckd_mul (&value, value, 8);
 396         }
 397 
 398         /* Parse the output of older, unportable tars, which generate
 399            negative values in two's complement octal. If the leading
 400            nonzero digit is 1, we can't recover the original value
 401            reliably; so do this only if the digit is 2 or more. This
 402            catches the common case of 32-bit negative time stamps. */
 403         if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
 404         {
 405             /* Compute the negative of the input value, assuming two's complement. */
 406             int digit;
 407 
 408             digit = (*where1 - '0') | 4;
 409             overflow = FALSE;
 410             value = 0;
 411             where = where1;
 412 
 413             while (TRUE)
 414             {
 415                 value += 7 - digit;
 416                 where++;
 417                 if (where == lim || !is_octal_digit (*where))
 418                     break;
 419                 digit = *where - '0';
 420                 overflow |= ckd_mul (&value, value, 8);
 421             }
 422 
 423             overflow |= ckd_add (&value, value, 1);
 424 
 425             if (!overflow && value <= minus_minval)
 426                 negative = TRUE;
 427         }
 428 
 429         if (overflow)
 430             return (-1);
 431     }
 432     else if (octal_only)
 433     {
 434         /* Suppress the following extensions. */
 435     }
 436     else if (*where == '-' || *where == '+')
 437     {
 438         /* Parse base-64 output produced only by tar test versions
 439            1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
 440            Support for this will be withdrawn in future tar releases. */
 441 
 442         negative = *where++ == '-';
 443 
 444         while (where != lim)
 445         {
 446             unsigned char uc = *where;
 447             char dig;
 448 
 449             dig = base64_map[uc];
 450             if (dig <= 0)
 451                 break;
 452 
 453             if (ckd_mul (&value, value, 64))
 454                 return (-1);
 455             value |= dig - 1;
 456             where++;
 457         }
 458     }
 459     else if (where <= lim - 2 && (*where == '\200'      /* positive base-256 */
 460                                   || *where == '\377' /* negative base-256 */ ))
 461     {
 462         /* Parse base-256 output.  A nonnegative number N is
 463            represented as (256**DIGS)/2 + N; a negative number -N is
 464            represented as (256**DIGS) - N, i.e. as two's complement.
 465            The representation guarantees that the leading bit is
 466            always on, so that we don't confuse this format with the
 467            others (assuming ASCII bytes of 8 bits or more). */
 468 
 469         int signbit;
 470         uintmax_t topbits;
 471 
 472         signbit = *where & (1 << (LG_256 - 2));
 473         topbits = ((uintmax_t) - signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
 474 
 475         value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
 476 
 477         while (TRUE)
 478         {
 479             unsigned char uc;
 480 
 481             uc = *where++;
 482             value = (value << LG_256) + uc;
 483             if (where == lim)
 484                 break;
 485 
 486             if (((value << LG_256 >> LG_256) | topbits) != value)
 487                 return (-1);
 488         }
 489 
 490         negative = signbit != 0;
 491         if (negative)
 492             value = -value;
 493     }
 494 
 495     if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
 496         return (-1);
 497 
 498     if (value <= (negative ? minus_minval : maxval))
 499         return tar_represent_uintmax (negative ? -value : value);
 500 
 501     return (-1);
 502 }
 503 
 504 /* --------------------------------------------------------------------------------------------- */
 505 
 506 off_t
 507 off_from_header (const char *p, size_t s)
     /* [previous][next][first][last][top][bottom][index][help]  */
 508 {
 509     /* Negative offsets are not allowed in tar files, so invoke
 510        from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
 511     return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
 512 }
 513 
 514 /* --------------------------------------------------------------------------------------------- */
 515 
 516 /**
 517  * Return the location of the next available input or output block.
 518  * Return NULL for EOF.
 519  */
 520 union block *
 521 tar_find_next_block (tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 522 {
 523     if (current_block == record_end)
 524     {
 525         if (hit_eof)
 526             return NULL;
 527 
 528         if (!tar_flush_archive (archive))
 529         {
 530             message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
 531             return NULL;
 532         }
 533 
 534         if (current_block == record_end)
 535         {
 536             hit_eof = TRUE;
 537             return NULL;
 538         }
 539     }
 540 
 541     return current_block;
 542 }
 543 
 544 /* --------------------------------------------------------------------------------------------- */
 545 
 546 /**
 547  * Indicate that we have used all blocks up thru @block.
 548  */
 549 gboolean
 550 tar_set_next_block_after (union block *block)
     /* [previous][next][first][last][top][bottom][index][help]  */
 551 {
 552     while (block >= current_block)
 553         current_block++;
 554 
 555     /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
 556        could mean the next block (if the input record is exactly one block long), which is not
 557        what is intended.  */
 558 
 559     return !(current_block > record_end);
 560 }
 561 
 562 /* --------------------------------------------------------------------------------------------- */
 563 
 564 /**
 565  * Compute and return the block ordinal at current_block.
 566  */
 567 off_t
 568 tar_current_block_ordinal (const tar_super_t *archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 569 {
 570     return record_start_block + (current_block - archive->record_start);
 571 }
 572 
 573 /* --------------------------------------------------------------------------------------------- */
 574 
 575 /**
 576  * Skip over @size bytes of data in blocks in the archive.
 577  */
 578 gboolean
 579 tar_skip_file (tar_super_t *archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 580 {
 581     union block *x;
 582     off_t nblk;
 583 
 584     nblk = tar_seek_archive (archive, size);
 585     if (nblk >= 0)
 586         size -= nblk * BLOCKSIZE;
 587 
 588     while (size > 0)
 589     {
 590         x = tar_find_next_block (archive);
 591         if (x == NULL)
 592             return FALSE;
 593 
 594         tar_set_next_block_after (x);
 595         size -= BLOCKSIZE;
 596     }
 597 
 598     return TRUE;
 599 }
 600 
 601 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */