1 /*
2 Virtual File System: GNU Tar file system.
3
4 Copyright (C) 2023-2025
5 Free Software Foundation, Inc.
6
7 Written by:
8 Andrew Borodin <aborodin@vmail.ru>, 2023
9
10 This file is part of the Midnight Commander.
11
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
16
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <https://www.gnu.org/licenses/>.
24 */
25
26 /**
27 * \file
28 * \brief Source: Virtual File System: GNU Tar file system
29 * \author Andrew Borodin
30 * \date 2022
31 */
32
33 #include <config.h>
34
35 #include <ctype.h> // isdigit()
36 #include <inttypes.h> // uintmax_t
37
38 #include "lib/global.h"
39 #include "lib/widget.h" // message()
40 #include "lib/vfs/vfs.h" // mc_read()
41
42 #include "tar-internal.h"
43
44 /*** global variables ****************************************************************************/
45
46 /*** file scope macro definitions ****************************************************************/
47
48 #ifndef UINTMAX_WIDTH
49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
50 #endif
51
52 /* Log base 2 of common values. */
53 #define LG_256 8
54
55 /*** file scope type declarations ****************************************************************/
56
57 /*** forward declarations (file scope functions) *************************************************/
58
59 /*** file scope variables ************************************************************************/
60
61 /* Table of base-64 digit values + 1, indexed by unsigned chars.
62 See Internet RFC 2045 Table 1.
63 Zero entries are for unsigned chars that are not base-64 digits. */
64 static char const base64_map[UCHAR_MAX + 1] = {
65 ['A'] = 0 + 1, ['B'] = 1 + 1, ['C'] = 2 + 1, ['D'] = 3 + 1, ['E'] = 4 + 1, ['F'] = 5 + 1,
66 ['G'] = 6 + 1, ['H'] = 7 + 1, ['I'] = 8 + 1, ['J'] = 9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
67 ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1, ['Q'] = 16 + 1, ['R'] = 17 + 1,
68 ['S'] = 18 + 1, ['T'] = 19 + 1, ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
69 ['Y'] = 24 + 1, ['Z'] = 25 + 1, ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
70 ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1, ['i'] = 34 + 1, ['j'] = 35 + 1,
71 ['k'] = 36 + 1, ['l'] = 37 + 1, ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
72 ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1, ['u'] = 46 + 1, ['v'] = 47 + 1,
73 ['w'] = 48 + 1, ['x'] = 49 + 1, ['y'] = 50 + 1, ['z'] = 51 + 1, ['0'] = 52 + 1, ['1'] = 53 + 1,
74 ['2'] = 54 + 1, ['3'] = 55 + 1, ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
75 ['8'] = 60 + 1, ['9'] = 61 + 1, ['+'] = 62 + 1, ['/'] = 63 + 1,
76 };
77
78 /* --------------------------------------------------------------------------------------------- */
79 /*** file scope functions ************************************************************************/
80 /* --------------------------------------------------------------------------------------------- */
81
82 static gboolean
83 tar_short_read (size_t status, tar_super_t *archive)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
84 {
85 size_t left; // bytes left
86 char *more; // pointer to next byte to read
87
88 left = record_size - status;
89 more = (char *) archive->record_start + status;
90
91 while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
92 {
93 if (status != 0)
94 {
95 ssize_t r;
96
97 r = mc_read (archive->fd, more, left);
98 if (r == -1)
99 return FALSE;
100
101 status = (size_t) r;
102 }
103
104 if (status == 0)
105 break;
106
107 left -= status;
108 more += status;
109 }
110
111 record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
112
113 return TRUE;
114 }
115
116 /* --------------------------------------------------------------------------------------------- */
117
118 static gboolean
119 tar_flush_read (tar_super_t *archive)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
120 {
121 size_t status;
122
123 status = mc_read (archive->fd, archive->record_start->buffer, record_size);
124 if ((idx_t) status == record_size)
125 return TRUE;
126
127 return tar_short_read (status, archive);
128 }
129
130 /* --------------------------------------------------------------------------------------------- */
131
132 /** Flush the current buffer from the archive.
133 */
134 static gboolean
135 tar_flush_archive (tar_super_t *archive)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
136 {
137 record_start_block += record_end - archive->record_start;
138 current_block = archive->record_start;
139 record_end = archive->record_start + blocking_factor;
140
141 return tar_flush_read (archive);
142 }
143
144 /* --------------------------------------------------------------------------------------------- */
145
146 static off_t
147 tar_seek_archive (tar_super_t *archive, off_t size)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
148 {
149 off_t start, offset;
150 off_t nrec, nblk;
151 off_t skipped;
152
153 // If low level I/O is already at EOF, do not try to seek further.
154 if (record_end < archive->record_start + blocking_factor)
155 return 0;
156
157 skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
158 if (size <= skipped)
159 return 0;
160
161 // Compute number of records to skip
162 nrec = (size - skipped) / record_size;
163 if (nrec == 0)
164 return 0;
165
166 start = tar_current_block_ordinal (archive);
167
168 offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
169 if (offset < 0)
170 return offset;
171
172 #if 0
173 if ((offset % record_size) != 0)
174 {
175 message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
176 return -1;
177 }
178 #endif
179
180 // Convert to number of records
181 offset /= BLOCKSIZE;
182 // Compute number of skipped blocks
183 nblk = offset - start;
184
185 // Update buffering info
186 record_start_block = offset - blocking_factor;
187 current_block = record_end;
188
189 return nblk;
190 }
191
192 /* --------------------------------------------------------------------------------------------- */
193 /*** public functions ****************************************************************************/
194 /* --------------------------------------------------------------------------------------------- */
195
196 gboolean
197 is_octal_digit (char c)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
198 {
199 return '0' <= c && c <= '7';
200 }
201
202 /* --------------------------------------------------------------------------------------------- */
203
204 void
205 tar_assign_string (char **string, char *value)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
206 {
207 g_free (*string);
208 *string = value;
209 }
210
211 /* --------------------------------------------------------------------------------------------- */
212
213 void
214 tar_assign_string_dup (char **string, const char *value)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
215 {
216 g_free (*string);
217 *string = g_strdup (value);
218 }
219
220 /* --------------------------------------------------------------------------------------------- */
221
222 void
223 tar_assign_string_dup_n (char **string, const char *value, size_t n)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
224 {
225 g_free (*string);
226 *string = g_strndup (value, n);
227 }
228
229 /* --------------------------------------------------------------------------------------------- */
230
231 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
232 to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
233 the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
234 value. @minval must not be positive.
235
236 If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
237 are assumed to be represented using leading '-' in the usual way. If the represented value
238 exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
239 value.
240
241 On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
242 then return 0.
243
244 Sample call to this function:
245
246 char *s_end;
247 gboolean overflow;
248 idx_t i;
249
250 i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
251 if ((s_end == s) | (s_end == '\0') | overflow)
252 diagnose_invalid (s);
253
254 This example uses "|" instead of "||" for fewer branches at runtime,
255 which tends to be more efficient on modern processors.
256
257 This function is named "stoint" instead of "strtoint" because
258 <string.h> reserves names beginning with "str".
259 */
260 #if !(INTMAX_MAX <= UINTMAX_MAX)
261 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
262 #endif
263 intmax_t
264 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
265 {
266 char const *p = arg;
267 intmax_t i;
268 int v = 0;
269
270 if (isdigit (*p))
271 {
272 if (minval <= 0)
273 {
274 i = *p - '0';
275
276 while (isdigit (*++p) != 0)
277 {
278 v |= ckd_mul (&i, i, 10) ? 1 : 0;
279 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
280 }
281
282 v |= maxval < (uintmax_t) i ? 1 : 0;
283 if (v != 0)
284 i = maxval;
285 }
286 else
287 {
288 uintmax_t u = *p - '0';
289
290 while (isdigit (*++p) != 0)
291 {
292 v |= ckd_mul (&u, u, 10) ? 1 : 0;
293 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
294 }
295
296 v |= maxval < u ? 1 : 0;
297 if (v != 0)
298 u = maxval;
299 i = tar_represent_uintmax (u);
300 }
301 }
302 else if (minval < 0 && *p == '-' && isdigit (p[1]))
303 {
304 p++;
305 i = -(*p - '0');
306
307 while (isdigit (*++p) != 0)
308 {
309 v |= ckd_mul (&i, i, 10) ? 1 : 0;
310 v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
311 }
312
313 v |= i < minval ? 1 : 0;
314 if (v != 0)
315 i = minval;
316 }
317 else
318 i = 0;
319
320 if (arglim != NULL)
321 *arglim = (char *) p;
322 if (overflow != NULL)
323 *overflow = v != 0;
324 return i;
325 }
326
327 /* --------------------------------------------------------------------------------------------- */
328
329 /**
330 * Convert buffer at @where0 of size @digs from external format to intmax_t.
331 * @digs must be positive.
332 * If @type is non-NULL, data are of type @type.
333 * The buffer must represent a value in the range -@minval through @maxval;
334 * if the mathematically correct result V would be greater than INTMAX_MAX,
335 * return a negative integer V such that (uintmax_t) V yields the correct result.
336 * If @octal_only, allow only octal numbers instead of the other GNU extensions.
337 *
338 * Result is -1 if the field is invalid.
339 */
340 #if !(INTMAX_MAX <= UINTMAX_MAX && -(INTMAX_MIN + 1) <= UINTMAX_MAX)
341 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
342 #endif
343 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
344 #error "tar_from_header() returns intmax_t to represent uintmax_t"
345 #endif
346 intmax_t
347 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
348 uintmax_t maxval, gboolean octal_only)
349 {
350 uintmax_t value = 0;
351 uintmax_t uminval = minval;
352 uintmax_t minus_minval = -uminval;
353 const char *where = where0;
354 char const *lim = where + digs;
355 gboolean negative = FALSE;
356
357 /* Accommodate buggy tar of unknown vintage, which outputs leading
358 NUL if the previous field overflows. */
359 if (*where == '\0')
360 where++;
361
362 // Accommodate older tars, which output leading spaces.
363 while (TRUE)
364 {
365 if (where == lim)
366 return (-1);
367
368 if (!g_ascii_isspace (*where))
369 break;
370
371 where++;
372 }
373
374 if (is_octal_digit (*where))
375 {
376 char const *where1 = where;
377 gboolean overflow = FALSE;
378
379 while (TRUE)
380 {
381 value += *where++ - '0';
382 if (where == lim || !is_octal_digit (*where))
383 break;
384 overflow |= ckd_mul (&value, value, 8);
385 }
386
387 /* Parse the output of older, unportable tars, which generate
388 negative values in two's complement octal. If the leading
389 nonzero digit is 1, we can't recover the original value
390 reliably; so do this only if the digit is 2 or more. This
391 catches the common case of 32-bit negative time stamps. */
392 if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
393 {
394 // Compute the negative of the input value, assuming two's complement.
395 int digit;
396
397 digit = (*where1 - '0') | 4;
398 overflow = FALSE;
399 value = 0;
400 where = where1;
401
402 while (TRUE)
403 {
404 value += 7 - digit;
405 where++;
406 if (where == lim || !is_octal_digit (*where))
407 break;
408 digit = *where - '0';
409 overflow |= ckd_mul (&value, value, 8);
410 }
411
412 overflow |= ckd_add (&value, value, 1);
413
414 if (!overflow && value <= minus_minval)
415 negative = TRUE;
416 }
417
418 if (overflow)
419 return (-1);
420 }
421 else if (octal_only)
422 {
423 // Suppress the following extensions.
424 }
425 else if (*where == '-' || *where == '+')
426 {
427 /* Parse base-64 output produced only by tar test versions
428 1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
429 Support for this will be withdrawn in future tar releases. */
430
431 negative = *where++ == '-';
432
433 while (where != lim)
434 {
435 unsigned char uc = *where;
436 char dig;
437
438 dig = base64_map[uc];
439 if (dig <= 0)
440 break;
441
442 if (ckd_mul (&value, value, 64))
443 return (-1);
444 value |= dig - 1;
445 where++;
446 }
447 }
448 else if (where <= lim - 2
449 && (*where == '\200' // positive base-256
450 || *where == '\377' /* negative base-256 */))
451 {
452 /* Parse base-256 output. A nonnegative number N is
453 represented as (256**DIGS)/2 + N; a negative number -N is
454 represented as (256**DIGS) - N, i.e. as two's complement.
455 The representation guarantees that the leading bit is
456 always on, so that we don't confuse this format with the
457 others (assuming ASCII bytes of 8 bits or more). */
458
459 int signbit;
460 uintmax_t topbits;
461
462 signbit = *where & (1 << (LG_256 - 2));
463 topbits = ((uintmax_t) -signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
464
465 value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
466
467 while (TRUE)
468 {
469 unsigned char uc;
470
471 uc = *where++;
472 value = (value << LG_256) + uc;
473 if (where == lim)
474 break;
475
476 if (((value << LG_256 >> LG_256) | topbits) != value)
477 return (-1);
478 }
479
480 negative = signbit != 0;
481 if (negative)
482 value = -value;
483 }
484
485 if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
486 return (-1);
487
488 if (value <= (negative ? minus_minval : maxval))
489 return tar_represent_uintmax (negative ? -value : value);
490
491 return (-1);
492 }
493
494 /* --------------------------------------------------------------------------------------------- */
495
496 off_t
497 off_from_header (const char *p, size_t s)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
498 {
499 /* Negative offsets are not allowed in tar files, so invoke
500 from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
501 return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
502 }
503
504 /* --------------------------------------------------------------------------------------------- */
505
506 /**
507 * Return the location of the next available input or output block.
508 * Return NULL for EOF.
509 */
510 union block *
511 tar_find_next_block (tar_super_t *archive)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
512 {
513 if (current_block == record_end)
514 {
515 if (hit_eof)
516 return NULL;
517
518 if (!tar_flush_archive (archive))
519 {
520 message (D_ERROR, MSG_ERROR, _ ("Inconsistent tar archive"));
521 return NULL;
522 }
523
524 if (current_block == record_end)
525 {
526 hit_eof = TRUE;
527 return NULL;
528 }
529 }
530
531 return current_block;
532 }
533
534 /* --------------------------------------------------------------------------------------------- */
535
536 /**
537 * Indicate that we have used all blocks up thru @block.
538 */
539 gboolean
540 tar_set_next_block_after (union block *block)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
541 {
542 while (block >= current_block)
543 current_block++;
544
545 /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
546 could mean the next block (if the input record is exactly one block long), which is not
547 what is intended. */
548
549 return !(current_block > record_end);
550 }
551
552 /* --------------------------------------------------------------------------------------------- */
553
554 /**
555 * Compute and return the block ordinal at current_block.
556 */
557 off_t
558 tar_current_block_ordinal (const tar_super_t *archive)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
559 {
560 return record_start_block + (current_block - archive->record_start);
561 }
562
563 /* --------------------------------------------------------------------------------------------- */
564
565 /**
566 * Skip over @size bytes of data in blocks in the archive.
567 */
568 gboolean
569 tar_skip_file (tar_super_t *archive, off_t size)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
570 {
571 union block *x;
572 off_t nblk;
573
574 nblk = tar_seek_archive (archive, size);
575 if (nblk >= 0)
576 size -= nblk * BLOCKSIZE;
577
578 while (size > 0)
579 {
580 x = tar_find_next_block (archive);
581 if (x == NULL)
582 return FALSE;
583
584 tar_set_next_block_after (x);
585 size -= BLOCKSIZE;
586 }
587
588 return TRUE;
589 }
590
591 /* --------------------------------------------------------------------------------------------- */