root/src/vfs/tar/tar-internal.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. tar_short_read
  2. tar_flush_read
  3. tar_flush_archive
  4. tar_seek_archive
  5. is_octal_digit
  6. tar_base64_init
  7. tar_assign_string
  8. tar_assign_string_dup
  9. tar_assign_string_dup_n
  10. tar_from_header
  11. off_from_header
  12. tar_find_next_block
  13. tar_set_next_block_after
  14. tar_current_block_ordinal
  15. tar_skip_file

   1 /*
   2    Virtual File System: GNU Tar file system.
   3 
   4    Copyright (C) 2023-2024
   5    Free Software Foundation, Inc.
   6 
   7    Written by:
   8    Andrew Borodin <aborodin@vmail.ru>, 2023
   9 
  10    This file is part of the Midnight Commander.
  11 
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16 
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21 
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25 
  26 /**
  27  * \file
  28  * \brief Source: Virtual File System: GNU Tar file system
  29  * \author Andrew Borodin
  30  * \date 2022
  31  */
  32 
  33 #include <config.h>
  34 
  35 #include <inttypes.h>           /* uintmax_t */
  36 #include <stdint.h>             /* UINTMAX_MAX, etc */
  37 
  38 #include "lib/global.h"
  39 #include "lib/widget.h"         /* message() */
  40 #include "lib/vfs/vfs.h"        /* mc_read() */
  41 
  42 #include "tar-internal.h"
  43 
  44 /*** global variables ****************************************************************************/
  45 
  46 /*** file scope macro definitions ****************************************************************/
  47 
  48 /* Log base 2 of common values. */
  49 #define LG_8 3
  50 #define LG_64 6
  51 #define LG_256 8
  52 
  53 /*** file scope type declarations ****************************************************************/
  54 
  55 /*** forward declarations (file scope functions) *************************************************/
  56 
  57 /*** file scope variables ************************************************************************/
  58 
  59 /* Base 64 digits; see RFC 2045 Table 1.  */
  60 static char const base_64_digits[64] = {
  61     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  62     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  63     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  64     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  65     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
  66 };
  67 
  68 /* Table of base 64 digit values indexed by unsigned chars.
  69    The value is 64 for unsigned chars that are not base 64 digits. */
  70 static char base64_map[1 + (unsigned char) (-1)];
  71 
  72 /* --------------------------------------------------------------------------------------------- */
  73 /*** file scope functions ************************************************************************/
  74 /* --------------------------------------------------------------------------------------------- */
  75 
  76 static gboolean
  77 tar_short_read (size_t status, tar_super_t * archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
  78 {
  79     size_t left;                /* bytes left */
  80     char *more;                 /* pointer to next byte to read */
  81 
  82     more = archive->record_start->buffer + status;
  83     left = record_size - status;
  84 
  85     while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
  86     {
  87         if (status != 0)
  88         {
  89             ssize_t r;
  90 
  91             r = mc_read (archive->fd, more, left);
  92             if (r == -1)
  93                 return FALSE;
  94 
  95             status = (size_t) r;
  96         }
  97 
  98         if (status == 0)
  99             break;
 100 
 101         left -= status;
 102         more += status;
 103     }
 104 
 105     record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
 106 
 107     return TRUE;
 108 }
 109 
 110 /* --------------------------------------------------------------------------------------------- */
 111 
 112 static gboolean
 113 tar_flush_read (tar_super_t * archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 114 {
 115     size_t status;
 116 
 117     status = mc_read (archive->fd, archive->record_start->buffer, record_size);
 118     if (status == record_size)
 119         return TRUE;
 120 
 121     return tar_short_read (status, archive);
 122 }
 123 
 124 /* --------------------------------------------------------------------------------------------- */
 125 
 126 /**  Flush the current buffer from the archive.
 127  */
 128 static gboolean
 129 tar_flush_archive (tar_super_t * archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 130 {
 131     record_start_block += record_end - archive->record_start;
 132     current_block = archive->record_start;
 133     record_end = archive->record_start + blocking_factor;
 134 
 135     return tar_flush_read (archive);
 136 }
 137 
 138 /* --------------------------------------------------------------------------------------------- */
 139 
 140 static off_t
 141 tar_seek_archive (tar_super_t * archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 142 {
 143     off_t start, offset;
 144     off_t nrec, nblk;
 145     off_t skipped;
 146 
 147     skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
 148     if (size <= skipped)
 149         return 0;
 150 
 151     /* Compute number of records to skip */
 152     nrec = (size - skipped) / record_size;
 153     if (nrec == 0)
 154         return 0;
 155 
 156     start = tar_current_block_ordinal (archive);
 157 
 158     offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
 159     if (offset < 0)
 160         return offset;
 161 
 162 #if 0
 163     if ((offset % record_size) != 0)
 164     {
 165         message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
 166         return -1;
 167     }
 168 #endif
 169 
 170     /* Convert to number of records */
 171     offset /= BLOCKSIZE;
 172     /* Compute number of skipped blocks */
 173     nblk = offset - start;
 174 
 175     /* Update buffering info */
 176     record_start_block = offset - blocking_factor;
 177     current_block = record_end;
 178 
 179     return nblk;
 180 }
 181 
 182 /* --------------------------------------------------------------------------------------------- */
 183 /*** public functions ****************************************************************************/
 184 /* --------------------------------------------------------------------------------------------- */
 185 
 186 gboolean
 187 is_octal_digit (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 188 {
 189     return '0' <= c && c <= '7';
 190 }
 191 
 192 /* --------------------------------------------------------------------------------------------- */
 193 
 194 void
 195 tar_base64_init (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
 196 {
 197     size_t i;
 198 
 199     memset (base64_map, 64, sizeof base64_map);
 200     for (i = 0; i < 64; i++)
 201         base64_map[(int) base_64_digits[i]] = i;
 202 }
 203 
 204 /* --------------------------------------------------------------------------------------------- */
 205 
 206 void
 207 tar_assign_string (char **string, char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 208 {
 209     g_free (*string);
 210     *string = value;
 211 }
 212 
 213 /* --------------------------------------------------------------------------------------------- */
 214 
 215 void
 216 tar_assign_string_dup (char **string, const char *value)
     /* [previous][next][first][last][top][bottom][index][help]  */
 217 {
 218     g_free (*string);
 219     *string = g_strdup (value);
 220 }
 221 
 222 /* --------------------------------------------------------------------------------------------- */
 223 
 224 void
 225 tar_assign_string_dup_n (char **string, const char *value, size_t n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 226 {
 227     g_free (*string);
 228     *string = g_strndup (value, n);
 229 }
 230 
 231 /* --------------------------------------------------------------------------------------------- */
 232 
 233 /**
 234  * Convert buffer at @where0 of size @digs from external format to intmax_t.
 235  * @digs must be positive.
 236  * If @type is non-NULL, data are of type @type.
 237  * The buffer must represent a value in the range -@minval through @maxval;
 238  * if the mathematically correct result V would be greater than INTMAX_MAX,
 239  * return a negative integer V such that (uintmax_t) V yields the correct result.
 240  * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 241  *
 242  * Result is -1 if the field is invalid.
 243  */
 244 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
 245 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
 246 #endif
 247 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
 248 #error "tar_from_header() returns intmax_t to represent uintmax_t"
 249 #endif
 250 intmax_t
 251 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
     /* [previous][next][first][last][top][bottom][index][help]  */
 252                  uintmax_t maxval, gboolean octal_only)
 253 {
 254     uintmax_t value = 0;
 255     uintmax_t uminval = minval;
 256     uintmax_t minus_minval = -uminval;
 257     const char *where = where0;
 258     char const *lim = where + digs;
 259     gboolean negative = FALSE;
 260 
 261     /* Accommodate buggy tar of unknown vintage, which outputs leading
 262        NUL if the previous field overflows. */
 263     if (*where == '\0')
 264         where++;
 265 
 266     /* Accommodate older tars, which output leading spaces. */
 267     while (TRUE)
 268     {
 269         if (where == lim)
 270             return (-1);
 271 
 272         if (!g_ascii_isspace (*where))
 273             break;
 274 
 275         where++;
 276     }
 277 
 278     if (is_octal_digit (*where))
 279     {
 280         char const *where1 = where;
 281         gboolean overflow = FALSE;
 282 
 283         while (TRUE)
 284         {
 285             value += *where++ - '0';
 286             if (where == lim || !is_octal_digit (*where))
 287                 break;
 288             overflow |= value != (value << LG_8 >> LG_8);
 289             value <<= LG_8;
 290         }
 291 
 292         /* Parse the output of older, unportable tars, which generate
 293            negative values in two's complement octal. If the leading
 294            nonzero digit is 1, we can't recover the original value
 295            reliably; so do this only if the digit is 2 or more. This
 296            catches the common case of 32-bit negative time stamps. */
 297         if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
 298         {
 299             /* Compute the negative of the input value, assuming two's complement. */
 300             int digit;
 301 
 302             digit = (*where1 - '0') | 4;
 303             overflow = FALSE;
 304             value = 0;
 305             where = where1;
 306 
 307             while (TRUE)
 308             {
 309                 value += 7 - digit;
 310                 where++;
 311                 if (where == lim || !is_octal_digit (*where))
 312                     break;
 313                 digit = *where - '0';
 314                 overflow |= value != (value << LG_8 >> LG_8);
 315                 value <<= LG_8;
 316             }
 317 
 318             value++;
 319             overflow |= value == 0;
 320 
 321             if (!overflow && value <= minus_minval)
 322                 negative = TRUE;
 323         }
 324 
 325         if (overflow)
 326             return (-1);
 327     }
 328     else if (octal_only)
 329     {
 330         /* Suppress the following extensions. */
 331     }
 332     else if (*where == '-' || *where == '+')
 333     {
 334         /* Parse base-64 output produced only by tar test versions
 335            1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
 336            Support for this will be withdrawn in future tar releases. */
 337         int dig;
 338 
 339         negative = *where++ == '-';
 340 
 341         while (where != lim && (dig = base64_map[(unsigned char) *where]) < 64)
 342         {
 343             if (value << LG_64 >> LG_64 != value)
 344                 return (-1);
 345             value = (value << LG_64) | dig;
 346             where++;
 347         }
 348     }
 349     else if (where <= lim - 2 && (*where == '\200'      /* positive base-256 */
 350                                   || *where == '\377' /* negative base-256 */ ))
 351     {
 352         /* Parse base-256 output.  A nonnegative number N is
 353            represented as (256**DIGS)/2 + N; a negative number -N is
 354            represented as (256**DIGS) - N, i.e. as two's complement.
 355            The representation guarantees that the leading bit is
 356            always on, so that we don't confuse this format with the
 357            others (assuming ASCII bytes of 8 bits or more). */
 358 
 359         int signbit;
 360         uintmax_t topbits;
 361 
 362         signbit = *where & (1 << (LG_256 - 2));
 363         topbits =
 364             (((uintmax_t) - signbit) << (CHAR_BIT * sizeof (uintmax_t) - LG_256 - (LG_256 - 2)));
 365 
 366         value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
 367 
 368         while (TRUE)
 369         {
 370             value = (value << LG_256) + (unsigned char) *where++;
 371             if (where == lim)
 372                 break;
 373 
 374             if (((value << LG_256 >> LG_256) | topbits) != value)
 375                 return (-1);
 376         }
 377 
 378         negative = signbit != 0;
 379         if (negative)
 380             value = -value;
 381     }
 382 
 383     if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
 384         return (-1);
 385 
 386     if (value <= (negative ? minus_minval : maxval))
 387         return tar_represent_uintmax (negative ? -value : value);
 388 
 389     return (-1);
 390 }
 391 
 392 /* --------------------------------------------------------------------------------------------- */
 393 
 394 off_t
 395 off_from_header (const char *p, size_t s)
     /* [previous][next][first][last][top][bottom][index][help]  */
 396 {
 397     /* Negative offsets are not allowed in tar files, so invoke
 398        from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
 399     return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
 400 }
 401 
 402 /* --------------------------------------------------------------------------------------------- */
 403 
 404 /**
 405  * Return the location of the next available input or output block.
 406  * Return NULL for EOF.
 407  */
 408 union block *
 409 tar_find_next_block (tar_super_t * archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 410 {
 411     if (current_block == record_end)
 412     {
 413         if (hit_eof)
 414             return NULL;
 415 
 416         if (!tar_flush_archive (archive))
 417         {
 418             message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
 419             return NULL;
 420         }
 421 
 422         if (current_block == record_end)
 423         {
 424             hit_eof = TRUE;
 425             return NULL;
 426         }
 427     }
 428 
 429     return current_block;
 430 }
 431 
 432 /* --------------------------------------------------------------------------------------------- */
 433 
 434 /**
 435  * Indicate that we have used all blocks up thru @block.
 436  */
 437 gboolean
 438 tar_set_next_block_after (union block * block)
     /* [previous][next][first][last][top][bottom][index][help]  */
 439 {
 440     while (block >= current_block)
 441         current_block++;
 442 
 443     /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
 444        could mean the next block (if the input record is exactly one block long), which is not
 445        what is intended.  */
 446 
 447     return !(current_block > record_end);
 448 }
 449 
 450 /* --------------------------------------------------------------------------------------------- */
 451 
 452 /**
 453  * Compute and return the block ordinal at current_block.
 454  */
 455 off_t
 456 tar_current_block_ordinal (const tar_super_t * archive)
     /* [previous][next][first][last][top][bottom][index][help]  */
 457 {
 458     return record_start_block + (current_block - archive->record_start);
 459 }
 460 
 461 /* --------------------------------------------------------------------------------------------- */
 462 
 463 /**
 464  * Skip over @size bytes of data in blocks in the archive.
 465  */
 466 gboolean
 467 tar_skip_file (tar_super_t * archive, off_t size)
     /* [previous][next][first][last][top][bottom][index][help]  */
 468 {
 469     union block *x;
 470     off_t nblk;
 471 
 472     nblk = tar_seek_archive (archive, size);
 473     if (nblk >= 0)
 474         size -= nblk * BLOCKSIZE;
 475 
 476     while (size > 0)
 477     {
 478         x = tar_find_next_block (archive);
 479         if (x == NULL)
 480             return FALSE;
 481 
 482         tar_set_next_block_after (x);
 483         size -= BLOCKSIZE;
 484     }
 485 
 486     return TRUE;
 487 }
 488 
 489 /* --------------------------------------------------------------------------------------------- */

/* [previous][next][first][last][top][bottom][index][help]  */