1 /* 2 Internal file viewer for the Midnight Commander 3 Function for plain view 4 5 Copyright (C) 1994-2025 6 Free Software Foundation, Inc. 7 8 Written by: 9 Miguel de Icaza, 1994, 1995, 1998 10 Janne Kukonlehto, 1994, 1995 11 Jakub Jelinek, 1995 12 Joseph M. Hinkle, 1996 13 Norbert Warmuth, 1997 14 Pavel Machek, 1998 15 Roland Illig <roland.illig@gmx.de>, 2004, 2005 16 Slava Zanko <slavazanko@google.com>, 2009 17 Andrew Borodin <aborodin@vmail.ru>, 2009-2022 18 Ilia Maslakov <il.smind@gmail.com>, 2009 19 Rewritten almost from scratch by: 20 Egmont Koblinger <egmont@gmail.com>, 2014 21 22 This file is part of the Midnight Commander. 23 24 The Midnight Commander is free software: you can redistribute it 25 and/or modify it under the terms of the GNU General Public License as 26 published by the Free Software Foundation, either version 3 of the License, 27 or (at your option) any later version. 28 29 The Midnight Commander is distributed in the hope that it will be useful, 30 but WITHOUT ANY WARRANTY; without even the implied warranty of 31 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 32 GNU General Public License for more details. 33 34 You should have received a copy of the GNU General Public License 35 along with this program. If not, see <https://www.gnu.org/licenses/>. 36 37 ------------------------------------------------------------------------------------------------ 38 39 The viewer is implemented along the following design principles: 40 41 Goals: Always display simple scripts, double wide (CJK), combining accents and spacing marks 42 (often used e.g. in Devanagari) perfectly. Make the arrow keys always work correctly. 43 44 Absolutely non-goal: RTL. 45 46 Terminology: 47 48 - A "paragraph" is the text between two adjacent newline characters. A "line" or "row" is a 49 visual row on the screen. In wrap mode, the viewer formats a paragraph into one or more lines. 50 51 - The Unicode glossary <https://www.unicode.org/glossary/> doesn't seem to have a notion of "base 52 character followed by zero or more combining characters". The closest matches are "Combining 53 Character Sequence" meaning a base character followed by one or more combining characters, or 54 "Grapheme" which seems to exclude non-printable characters such as newline. In this file, 55 "combining character sequence" (or any obvious abbreviation thereof) means a base character 56 followed by zero or more (up to a current limit of 4) combining characters. 57 58 ------------------------------------------------------------------------------------------------ 59 60 The parser-formatter is designed to be stateless across paragraphs. This is so that we can walk 61 backwards without having to reparse the whole file (although we still need to reparse and 62 reformat the whole paragraph, but it's a lot better). This principle needs to be changed if we 63 ever get to address tickets 1849/2977, but then we can still store (for efficiency) the parser 64 state at the beginning of the paragraph, and safely walk backwards if we don't cross an escape 65 character. 66 67 The parser-formatter, however, definitely needs to carry a state across lines. Currently this 68 state contains: 69 70 - The logical column (as if we didn't wrap). This is used for handling TAB characters after a 71 wordwrap consistently with less. 72 73 - Whether the last nroff character was bold or underlined. This is used for displaying the 74 ambiguous _\b_ sequence consistently with less. 75 76 - Whether the desired way of displaying a lonely combining accent or spacing mark is to place it 77 over a dotted circle (we do this at the beginning of the paragraph of after a TAB), or to ignore 78 the combining char and show replacement char for the spacing mark (we do this if e.g. too many 79 of these were encountered and hence we don't glue them with their base character). 80 81 - (This state needs to be expanded if e.g. we decide to print verbose replacement characters 82 (e.g. "<U+0080>") and allow these to wrap around lines.) 83 84 The state also contains the file offset, as it doesn't make sense to ever know the state without 85 knowing the corresponding offset. 86 87 The state depends on various settings (viewer width, encoding, nroff mode, charwrap or wordwrap 88 mode (if we'll have that one day) etc.), needs to be recomputed if any of these changes. 89 90 Walking forwards is usually relatively easy both in the file and on the screen. Walking 91 backwards within a paragraph would only be possible in some special cases and even then it would 92 be painful, so we always walk back to the beginning of the paragraph and reparse-reformat from 93 there. 94 95 (Walking back within a line in the file would have at least the following difficulties: handling 96 the parser state; processing invalid UTF-8; processing invalid nroff (e.g. what is "_\bA\bA"?). 97 Walking back on the display: we wouldn't know where to display the last line of a paragraph, or 98 where to display a line if its following line starts with a wide (CJK or Tab) character. Long 99 story short: just forget this approach.) 100 101 Most important variables: 102 103 - dpy_start: Both in unwrap and wrap modes this points to the beginning of the topmost displayed 104 paragraph. 105 106 - dpy_text_column: Only in unwrap mode, an additional horizontal scroll. 107 108 - dpy_paragraph_skip_lines: Only in wrap mode, an additional vertical scroll (the number of 109 lines that are scrolled off at the top from the topmost paragraph). 110 111 - dpy_state_top: Only in wrap mode, the offset and parser-formatter state at the line where 112 displaying the file begins is cached here. 113 114 - dpy_wrap_dirty: If some parameter has changed that makes it necessary to reparse-redisplay the 115 topmost paragraph. 116 117 In wrap mode, the three variables "dpy_start", "dpy_paragraph_skip_lines" and "dpy_state_top" 118 are kept consistent. Think of the first two as the ones describing the position, and the third 119 as a cached value for better performance so that we don't need to wrap the invisible beginning 120 of the topmost paragraph over and over again. The third value needs to be recomputed each time a 121 parameter that influences parsing or displaying the file (e.g. width of screen, encoding, nroff 122 mode) changes, this is signaled by "dpy_wrap_dirty" to force recomputing "dpy_state_top" (and 123 clamp "dpy_paragraph_skip_lines" if necessary). 124 125 ------------------------------------------------------------------------------------------------ 126 127 Help integration 128 129 I'm planning to port the help viewer to this codebase. 130 131 Splitting at sections would still happen in the help viewer. It would either copy a section, or 132 set force_max and a similar force_min to limit displaying to one section only. 133 134 Parsing the help format would go next to the nroff parser. The colors, alternate character set, 135 and emitting the version number would go to the "state". (The version number would be 136 implemented by emitting remaining characters of a buffer in the "state" one by one, without 137 advancing in the file position.) 138 139 The active link would be drawn similarly to the search highlight. Other than that, the viewer 140 wouldn't care about links (except for their color). help.c would keep track of which one is 141 highlighted, how to advance to the next/prev on an arrow, how the scroll offset needs to be 142 adjusted when moving, etc. 143 144 Add wrapping at word boundaries to where wrapping at char boundaries happens now. 145 */ 146 147 #include <config.h> 148 149 #include "lib/global.h" 150 #include "lib/tty/tty.h" 151 #include "lib/skin.h" 152 #include "lib/util.h" // is_printable() 153 #include "lib/charsets.h" 154 155 #include "src/setup.h" // option_tab_spacing 156 157 #include "internal.h" 158 159 /*** global variables ****************************************************************************/ 160 161 /*** file scope macro definitions ****************************************************************/ 162 163 /* The Unicode standard recommends that lonely combining characters are printed over a dotted 164 * circle. If the terminal is not UTF-8, this will be replaced by a dot anyway. */ 165 #define BASE_CHARACTER_FOR_LONELY_COMBINING 0x25CC // dotted circle 166 #define MAX_COMBINING_CHARS 4 // both slang and ncurses support exactly 4 167 168 /* I think anything other than space (e.g. arrows) just introduce visual clutter without actually 169 * adding value. */ 170 #define PARTIAL_CJK_AT_LEFT_MARGIN ' ' 171 #define PARTIAL_CJK_AT_RIGHT_MARGIN ' ' 172 173 /* 174 * Wrap mode: This is for safety so that jumping to the end of file (which already includes 175 * scrolling back by a page) and then walking backwards is reasonably fast, even if the file is 176 * extremely large and consists of maybe full zeros or something like that. If there's no newline 177 * found within this limit, just start displaying from there and see what happens. We might get 178 * some displaying parameters (most importantly the columns) incorrect, but at least will show the 179 * file without spinning the CPU for ages. When scrolling back to that point, the user might see a 180 * garbled first line (even starting with an invalid partial UTF-8), but then walking back by yet 181 * another line should fix it. 182 * 183 * Unwrap mode: This is not used, we wouldn't be able to do anything reasonable without walking 184 * back a whole paragraph (well, view->data_area.height paragraphs actually). 185 */ 186 #define MAX_BACKWARDS_WALK_IN_PARAGRAPH (100 * 1000) 187 188 /*** file scope type declarations ****************************************************************/ 189 190 /*** forward declarations (file scope functions) *************************************************/ 191 192 /*** file scope variables ************************************************************************/ 193 194 /* --------------------------------------------------------------------------------------------- */ 195 /*** file scope functions ************************************************************************/ 196 /* --------------------------------------------------------------------------------------------- */ 197 198 /* TODO: These methods shouldn't be necessary, see ticket 3257 */ 199 200 static int 201 mcview_wcwidth (const WView *view, int c) /**/ 202 { 203 if (view->utf8) 204 { 205 if (g_unichar_iswide (c)) 206 return 2; 207 if (g_unichar_iszerowidth (c)) 208 return 0; 209 } 210 211 return 1; 212 } 213 214 /* --------------------------------------------------------------------------------------------- */ 215 216 static inline gboolean 217 mcview_ismark (const WView *view, int c) /*
*/ 218 { 219 return (view->utf8 && g_unichar_ismark (c)); 220 } 221 222 /* --------------------------------------------------------------------------------------------- */ 223 224 /* actually is_non_spacing_mark_or_enclosing_mark */ 225 static gboolean 226 mcview_is_non_spacing_mark (const WView *view, int c) /*
*/ 227 { 228 if (view->utf8) 229 { 230 const GUnicodeType type = g_unichar_type (c); 231 232 return type == G_UNICODE_NON_SPACING_MARK || type == G_UNICODE_ENCLOSING_MARK; 233 } 234 235 return FALSE; 236 } 237 238 /* --------------------------------------------------------------------------------------------- */ 239 240 #if 0 241 static gboolean 242 mcview_is_spacing_mark (const WView *view, int c) /*
*/ 243 { 244 return (view->utf8 && g_unichar_type (c) == G_UNICODE_SPACING_MARK); 245 } 246 #endif 247 248 /* --------------------------------------------------------------------------------------------- */ 249 250 static gboolean 251 mcview_isprint (const WView *view, int c) /*
*/ 252 { 253 if (!view->utf8) 254 c = convert_from_8bit_to_utf_c ((unsigned char) c, view->converter); 255 return g_unichar_isprint (c); 256 } 257 258 /* --------------------------------------------------------------------------------------------- */ 259 260 static int 261 mcview_char_display (const WView *view, int c, char *s) /*
*/ 262 { 263 if (mc_global.utf8_display) 264 { 265 if (!view->utf8) 266 c = convert_from_8bit_to_utf_c ((unsigned char) c, view->converter); 267 if (!g_unichar_isprint (c)) 268 c = '.'; 269 return g_unichar_to_utf8 (c, s); 270 } 271 if (view->utf8) 272 { 273 if (g_unichar_iswide (c)) 274 { 275 s[0] = s[1] = '.'; 276 return 2; 277 } 278 if (g_unichar_iszerowidth (c)) 279 return 0; 280 // TODO the is_printable check below will be broken for this 281 c = convert_from_utf_to_current_c (c, view->converter); 282 } 283 else 284 { 285 // TODO the is_printable check below will be broken for this 286 c = convert_to_display_c (c); 287 } 288 289 // TODO this is very-very buggy by design: ticket 3257 comments 0-1 290 if (!is_printable (c)) 291 c = '.'; 292 *s = c; 293 return 1; 294 } 295 296 /* --------------------------------------------------------------------------------------------- */ 297 298 /** 299 * Just for convenience, a common interface in front of mcview_get_utf and mcview_get_byte, so that 300 * the caller doesn't have to care about utf8 vs 8-bit modes. 301 * 302 * Normally: stores c, updates state, returns TRUE. 303 * At EOF: state is unchanged, c is undefined, returns FALSE. 304 * 305 * Just as with mcview_get_utf(), invalid UTF-8 is reported using negative integers. 306 * 307 * Also, temporary hack: handle force_max here. 308 * TODO: move it to lower layers (datasource.c)? 309 */ 310 static gboolean 311 mcview_get_next_char (WView *view, mcview_state_machine_t *state, int *c) /*
*/ 312 { 313 // Pretend EOF if we reached force_max 314 if (view->force_max >= 0 && state->offset >= view->force_max) 315 return FALSE; 316 317 if (view->utf8) 318 { 319 int char_length = 0; 320 321 if (!mcview_get_utf (view, state->offset, c, &char_length)) 322 return FALSE; 323 // Pretend EOF if we crossed force_max 324 if (view->force_max >= 0 && state->offset + char_length > view->force_max) 325 return FALSE; 326 327 state->offset += char_length; 328 return TRUE; 329 } 330 331 if (!mcview_get_byte (view, state->offset, c)) 332 return FALSE; 333 state->offset++; 334 return TRUE; 335 } 336 337 /* --------------------------------------------------------------------------------------------- */ 338 /** 339 * This function parses the next nroff character and gives it to you along with its desired color, 340 * so you never have to care about nroff again. 341 * 342 * The nroff mode does the backspace trick for every single character (Unicode codepoint). At least 343 * that's what the GNU groff 1.22 package produces, and that's what less 458 expects. For 344 * double-wide characters (CJK), still only a single backspace is emitted. For combining accents 345 * and such, the print-backspace-print step is repeated for the base character and then for each 346 * accent separately. 347 * 348 * So, the right place for this layer is after the bytes are interpreted in UTF-8, but before 349 * joining a base character with its combining accents. 350 * 351 * Normally: stores c and color, updates state, returns TRUE. 352 * At EOF: state is unchanged, c and color are undefined, returns FALSE. 353 * 354 * color can be null if the caller doesn't care. 355 */ 356 static gboolean 357 mcview_get_next_maybe_nroff_char (WView *view, mcview_state_machine_t *state, int *c, int *color) /*
*/ 358 { 359 mcview_state_machine_t state_after_nroff; 360 int c2, c3; 361 362 if (color != NULL) 363 *color = VIEW_NORMAL_COLOR; 364 365 if (!view->mode_flags.nroff) 366 return mcview_get_next_char (view, state, c); 367 368 if (!mcview_get_next_char (view, state, c)) 369 return FALSE; 370 // Don't allow nroff formatting around CR, LF, TAB or other special chars 371 if (!mcview_isprint (view, *c)) 372 return TRUE; 373 374 state_after_nroff = *state; 375 376 if (!mcview_get_next_char (view, &state_after_nroff, &c2)) 377 return TRUE; 378 if (c2 != '\b') 379 return TRUE; 380 381 if (!mcview_get_next_char (view, &state_after_nroff, &c3)) 382 return TRUE; 383 if (!mcview_isprint (view, c3)) 384 return TRUE; 385 386 if (*c == '_' && c3 == '_') 387 { 388 *state = state_after_nroff; 389 if (color != NULL) 390 *color = 391 state->nroff_underscore_is_underlined ? VIEW_UNDERLINED_COLOR : VIEW_BOLD_COLOR; 392 } 393 else if (*c == c3) 394 { 395 *state = state_after_nroff; 396 state->nroff_underscore_is_underlined = FALSE; 397 if (color != NULL) 398 *color = VIEW_BOLD_COLOR; 399 } 400 else if (*c == '_') 401 { 402 *c = c3; 403 *state = state_after_nroff; 404 state->nroff_underscore_is_underlined = TRUE; 405 if (color != NULL) 406 *color = VIEW_UNDERLINED_COLOR; 407 } 408 409 return TRUE; 410 } 411 412 /* --------------------------------------------------------------------------------------------- */ 413 /** 414 * Get one base character, along with its combining or spacing mark characters. 415 * 416 * (A spacing mark is a character that extends the base character's width 1 into a combined 417 * character of width 2, yet these two character cells should not be separated. E.g. Devanagari 418 * <U+0939><U+094B>.) 419 * 420 * This method exists mainly for two reasons. One is to be able to tell if we fit on the current 421 * line or need to wrap to the next one. The other is that both slang and ncurses seem to require 422 * that the character and its combining marks are printed in a single call (or is it just a 423 * limitation of mc's wrapper to them?). 424 * 425 * For convenience, this method takes care of converting CR or CR+LF into LF. 426 * TODO this should probably happen later, when displaying the file? 427 * 428 * Normally: stores cs and color, updates state, returns >= 1 (entries in cs). 429 * At EOF: state is unchanged, cs and color are undefined, returns 0. 430 * 431 * @param view ... 432 * @param state the parser-formatter state machine's state, updated 433 * @param cs store the characters here 434 * @param clen the room available in cs (that is, at most clen-1 combining marks are allowed), must 435 * be at least 2 436 * @param color if non-NULL, store the color here, taken from the first codepoint's color 437 * @return the number of entries placed in cs, or 0 on EOF 438 */ 439 static int 440 mcview_next_combining_char_sequence (WView *view, mcview_state_machine_t *state, int *cs, int clen, /*
*/ 441 int *color) 442 { 443 int i = 1; 444 445 if (!mcview_get_next_maybe_nroff_char (view, state, cs, color)) 446 return 0; 447 448 // Process \r and \r\n newlines. 449 if (cs[0] == '\r') 450 { 451 int cnext; 452 453 mcview_state_machine_t state_after_crlf = *state; 454 if (mcview_get_next_maybe_nroff_char (view, &state_after_crlf, &cnext, NULL) 455 && cnext == '\n') 456 *state = state_after_crlf; 457 cs[0] = '\n'; 458 return 1; 459 } 460 461 // We don't want combining over non-printable characters. This includes '\n' and '\t' too. 462 if (!mcview_isprint (view, cs[0])) 463 return 1; 464 465 if (mcview_ismark (view, cs[0])) 466 { 467 if (!state->print_lonely_combining) 468 { 469 // First character is combining. Either just return it, ... 470 return 1; 471 } 472 else 473 { 474 // or place this (and subsequent combining ones) over a dotted circle. 475 cs[1] = cs[0]; 476 cs[0] = BASE_CHARACTER_FOR_LONELY_COMBINING; 477 i = 2; 478 } 479 } 480 481 if (mcview_wcwidth (view, cs[0]) == 2) 482 { 483 // Don't allow combining or spacing mark for wide characters, is this okay? 484 return 1; 485 } 486 487 /* Look for more combining chars. Either at most clen-1 zero-width combining chars, 488 * or at most 1 spacing mark. Is this logic correct? */ 489 for (; i < clen; i++) 490 { 491 mcview_state_machine_t state_after_combining; 492 493 state_after_combining = *state; 494 if (!mcview_get_next_maybe_nroff_char (view, &state_after_combining, &cs[i], NULL)) 495 return i; 496 if (!mcview_ismark (view, cs[i]) || !mcview_isprint (view, cs[i])) 497 return i; 498 if (g_unichar_type (cs[i]) == G_UNICODE_SPACING_MARK) 499 { 500 // Only allow as the first combining char. Stop processing in either case. 501 if (i == 1) 502 { 503 *state = state_after_combining; 504 i++; 505 } 506 return i; 507 } 508 *state = state_after_combining; 509 } 510 return i; 511 } 512 513 /* --------------------------------------------------------------------------------------------- */ 514 /** 515 * Parse, format and possibly display one visual line of text. 516 * 517 * Formatting starts at the given "state" (which encodes the file offset and parser and formatter's 518 * internal state). In unwrap mode, this should point to the beginning of the paragraph with the 519 * default state, the additional horizontal scrolling is added here. In wrap mode, this should 520 * point to the beginning of the line, with the proper state at that point. 521 * 522 * In wrap mode, if a line ends in a newline, it is consumed, even if it's exactly at the right 523 * edge. In unwrap mode, the whole remaining line, including the newline is consumed. Displaying 524 * the next line should start at "state"'s new value, or if we displayed the bottom line then 525 * state->offset tells the file offset to be shown in the top bar. 526 * 527 * If "row" is offscreen, don't actually display the line but still update "state" and return the 528 * proper value. This is used by mcview_wrap_move_down to advance in the file. 529 * 530 * @param view ... 531 * @param state the parser-formatter state machine's state, updated 532 * @param row print to this row 533 * @param paragraph_ended store TRUE if paragraph ended by newline or EOF, FALSE if wraps to next 534 * line 535 * @param linewidth store the width of the line here 536 * @return the number of rows, that is, 0 if we were already at EOF, otherwise 1 537 */ 538 static int 539 mcview_display_line (WView *view, mcview_state_machine_t *state, int row, gboolean *paragraph_ended, /*
*/ 540 off_t *linewidth) 541 { 542 const WRect *r = &view->data_area; 543 off_t dpy_text_column = view->mode_flags.wrap ? 0 : view->dpy_text_column; 544 int col = 0; 545 int cs[1 + MAX_COMBINING_CHARS]; 546 char str[(1 + MAX_COMBINING_CHARS) * UTF8_CHAR_LEN + 1]; 547 int i, j; 548 549 if (paragraph_ended != NULL) 550 *paragraph_ended = TRUE; 551 552 if (!view->mode_flags.wrap && (row < 0 || row >= r->lines) && linewidth == NULL) 553 { 554 /* Optimization: Fast forward to the end of the line, rather than carefully 555 * parsing and then not actually displaying it. */ 556 off_t eol; 557 int retval; 558 559 eol = mcview_eol (view, state->offset); 560 retval = (eol > state->offset) ? 1 : 0; 561 562 mcview_state_machine_init (state, eol); 563 return retval; 564 } 565 566 while (TRUE) 567 { 568 int charwidth = 0; 569 mcview_state_machine_t state_saved; 570 int n; 571 int color; 572 573 state_saved = *state; 574 n = mcview_next_combining_char_sequence (view, state, cs, 1 + MAX_COMBINING_CHARS, &color); 575 if (n == 0) 576 { 577 if (linewidth != NULL) 578 *linewidth = col; 579 return (col > 0) ? 1 : 0; 580 } 581 582 if (view->search_start <= state->offset && state->offset < view->search_end) 583 color = VIEW_SELECTED_COLOR; 584 585 if (cs[0] == '\n') 586 { 587 // New line: reset all formatting state for the next paragraph. 588 mcview_state_machine_init (state, state->offset); 589 if (linewidth != NULL) 590 *linewidth = col; 591 return 1; 592 } 593 594 if (mcview_is_non_spacing_mark (view, cs[0])) 595 { 596 // Lonely combining character. Probably leftover after too many combining chars. Just 597 // ignore. 598 continue; 599 } 600 601 // Nonprintable, or lonely spacing mark 602 if ((!mcview_isprint (view, cs[0]) || mcview_ismark (view, cs[0])) && cs[0] != '\t') 603 cs[0] = '.'; 604 605 for (i = 0; i < n; i++) 606 charwidth += mcview_wcwidth (view, cs[i]); 607 608 /* Adjust the width for TAB. It's handled below along with the normal characters, 609 * so that it's wrapped consistently with them, and is painted with the proper 610 * attributes (although currently it can't have a special color). */ 611 if (cs[0] == '\t') 612 { 613 charwidth = option_tab_spacing - state->unwrapped_column % option_tab_spacing; 614 state->print_lonely_combining = TRUE; 615 } 616 else 617 state->print_lonely_combining = FALSE; 618 619 /* In wrap mode only: We're done with this row if the character sequence wouldn't fit. 620 * Except if at the first column, because then it wouldn't fit in the next row either. 621 * In this extreme case let the unwrapped code below do its best to display it. */ 622 if (view->mode_flags.wrap && (off_t) col + charwidth > dpy_text_column + (off_t) r->cols 623 && col > 0) 624 { 625 *state = state_saved; 626 if (paragraph_ended != NULL) 627 *paragraph_ended = FALSE; 628 if (linewidth != NULL) 629 *linewidth = col; 630 return 1; 631 } 632 633 // Display, unless outside of the viewport. 634 if (row >= 0 && row < r->lines) 635 { 636 if ((off_t) col >= dpy_text_column 637 && (off_t) col + charwidth <= dpy_text_column + (off_t) r->cols) 638 { 639 // The combining character sequence fits entirely in the viewport. Print it. 640 tty_setcolor (color); 641 widget_gotoyx (view, r->y + row, r->x + ((off_t) col - dpy_text_column)); 642 if (cs[0] == '\t') 643 { 644 for (i = 0; i < charwidth; i++) 645 tty_print_char (' '); 646 } 647 else 648 { 649 j = 0; 650 for (i = 0; i < n; i++) 651 j += mcview_char_display (view, cs[i], str + j); 652 str[j] = '\0'; 653 /* This is probably a bug in our tty layer, but tty_print_string 654 * normalizes the string, whereas tty_printf doesn't. Don't normalize, 655 * since we handle combining characters ourselves correctly, it's 656 * better if they are copy-pasted correctly. Ticket 3255. */ 657 tty_printf ("%s", str); 658 } 659 } 660 else if ((off_t) col < dpy_text_column && (off_t) col + charwidth > dpy_text_column) 661 { 662 /* The combining character sequence would cross the left edge of the viewport. 663 * This cannot happen with wrap mode. Print replacement character(s), 664 * or spaces with the correct attributes for partial Tabs. */ 665 tty_setcolor (color); 666 for (i = dpy_text_column; 667 i < (off_t) col + charwidth && i < dpy_text_column + (off_t) r->cols; i++) 668 { 669 widget_gotoyx (view, r->y + row, r->x + (i - dpy_text_column)); 670 tty_print_anychar ((cs[0] == '\t') ? ' ' : PARTIAL_CJK_AT_LEFT_MARGIN); 671 } 672 } 673 else if ((off_t) col < dpy_text_column + (off_t) r->cols 674 && (off_t) col + charwidth > dpy_text_column + (off_t) r->cols) 675 { 676 /* The combining character sequence would cross the right edge of the viewport 677 * and we're not wrapping. Print replacement character(s), 678 * or spaces with the correct attributes for partial Tabs. */ 679 tty_setcolor (color); 680 for (i = col; i < dpy_text_column + (off_t) r->cols; i++) 681 { 682 widget_gotoyx (view, r->y + row, r->x + (i - dpy_text_column)); 683 tty_print_anychar ((cs[0] == '\t') ? ' ' : PARTIAL_CJK_AT_RIGHT_MARGIN); 684 } 685 } 686 } 687 688 col += charwidth; 689 state->unwrapped_column += charwidth; 690 691 if (!view->mode_flags.wrap && (off_t) col >= dpy_text_column + (off_t) r->cols 692 && linewidth == NULL) 693 { 694 /* Optimization: Fast forward to the end of the line, rather than carefully 695 * parsing and then not actually displaying it. */ 696 off_t eol; 697 698 eol = mcview_eol (view, state->offset); 699 mcview_state_machine_init (state, eol); 700 return 1; 701 } 702 } 703 } 704 705 /* --------------------------------------------------------------------------------------------- */ 706 /** 707 * Parse, format and possibly display one paragraph (perhaps not from the beginning). 708 * 709 * Formatting starts at the given "state" (which encodes the file offset and parser and formatter's 710 * internal state). In unwrap mode, this should point to the beginning of the paragraph with the 711 * default state, the additional horizontal scrolling is added here. In wrap mode, this may point 712 * to the beginning of the line within a paragraph (to display the partial paragraph at the top), 713 * with the proper state at that point. 714 * 715 * Displaying the next paragraph should start at "state"'s new value, or if we displayed the bottom 716 * line then state->offset tells the file offset to be shown in the top bar. 717 * 718 * If "row" is negative, don't display the first abs(row) lines and display the rest from the top. 719 * This was a nice idea but it's now unused :) 720 * 721 * If "row" is too large, don't display the paragraph at all but still return the number of lines. 722 * This is used when moving upwards. 723 * 724 * @param view ... 725 * @param state the parser-formatter state machine's state, updated 726 * @param row print starting at this row 727 * @return the number of rows the paragraphs is wrapped to, that is, 0 if we were already at EOF, 728 * otherwise 1 in unwrap mode, >= 1 in wrap mode. We stop when reaching the bottom of the 729 * viewport, it's not counted how many more lines the paragraph would occupy 730 */ 731 static int 732 mcview_display_paragraph (WView *view, mcview_state_machine_t *state, int row) /*
*/ 733 { 734 int lines = 0; 735 736 while (TRUE) 737 { 738 gboolean paragraph_ended; 739 740 lines += mcview_display_line (view, state, row, ¶graph_ended, NULL); 741 if (paragraph_ended) 742 return lines; 743 744 if (row < view->data_area.lines) 745 { 746 row++; 747 // stop if bottom of screen reached 748 if (row >= view->data_area.lines) 749 return lines; 750 } 751 } 752 } 753 754 /* --------------------------------------------------------------------------------------------- */ 755 /** 756 * Recompute dpy_state_top from dpy_start and dpy_paragraph_skip_lines. Clamp 757 * dpy_paragraph_skip_lines if necessary. 758 * 759 * This method should be called in wrap mode after changing one of the parsing or formatting 760 * properties (e.g. window width, encoding, nroff), or when switching to wrap mode from unwrap or 761 * hex. 762 * 763 * If we stayed within the same paragraph then try to keep the vertical offset within that 764 * paragraph as well. It might happen though that the paragraph became shorter than our desired 765 * vertical position, in that case move to its last row. 766 */ 767 static void 768 mcview_wrap_fixup (WView *view) /*
*/ 769 { 770 int lines = view->dpy_paragraph_skip_lines; 771 772 if (!view->dpy_wrap_dirty) 773 return; 774 view->dpy_wrap_dirty = FALSE; 775 776 view->dpy_paragraph_skip_lines = 0; 777 mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); 778 779 while (lines-- != 0) 780 { 781 mcview_state_machine_t state_prev; 782 gboolean paragraph_ended; 783 784 state_prev = view->dpy_state_top; 785 if (mcview_display_line (view, &view->dpy_state_top, -1, ¶graph_ended, NULL) == 0) 786 break; 787 if (paragraph_ended) 788 { 789 view->dpy_state_top = state_prev; 790 break; 791 } 792 view->dpy_paragraph_skip_lines++; 793 } 794 } 795 796 /* --------------------------------------------------------------------------------------------- */ 797 /*** public functions ****************************************************************************/ 798 /* --------------------------------------------------------------------------------------------- */ 799 800 /** 801 * In both wrap and unwrap modes, dpy_start points to the beginning of the paragraph. 802 * 803 * In unwrap mode, start displaying from this position, probably applying an additional horizontal 804 * scroll. 805 * 806 * In wrap mode, an additional dpy_paragraph_skip_lines lines are skipped from the top of this 807 * paragraph. dpy_state_top contains the position and parser-formatter state corresponding to the 808 * top left corner so we can just start rendering from here. Unless dpy_wrap_dirty is set in which 809 * case dpy_state_top is invalid and we need to recompute first. 810 */ 811 void 812 mcview_display_text (WView *view) /*
*/ 813 { 814 const WRect *r = &view->data_area; 815 int row; 816 mcview_state_machine_t state; 817 gboolean again; 818 819 do 820 { 821 int n; 822 823 again = FALSE; 824 825 mcview_display_clean (view); 826 mcview_display_ruler (view); 827 828 if (!view->mode_flags.wrap) 829 mcview_state_machine_init (&state, view->dpy_start); 830 else 831 { 832 mcview_wrap_fixup (view); 833 state = view->dpy_state_top; 834 } 835 836 for (row = 0; row < r->lines; row += n) 837 { 838 n = mcview_display_paragraph (view, &state, row); 839 if (n == 0) 840 { 841 /* In the rare case that displaying didn't start at the beginning 842 * of the file, yet there are some empty lines at the bottom, 843 * scroll the file and display again. This happens when e.g. the 844 * window is made bigger, or the file becomes shorter due to 845 * charset change or enabling nroff. */ 846 if ((view->mode_flags.wrap ? view->dpy_state_top.offset : view->dpy_start) > 0) 847 { 848 mcview_ascii_move_up (view, r->lines - row); 849 again = TRUE; 850 } 851 break; 852 } 853 } 854 } 855 while (again); 856 857 view->dpy_end = state.offset; 858 view->dpy_state_bottom = state; 859 860 tty_setcolor (VIEW_NORMAL_COLOR); 861 if (mcview_show_eof != NULL && mcview_show_eof[0] != '\0') 862 while (row < r->lines) 863 { 864 widget_gotoyx (view, r->y + row, r->x); 865 // TODO: should make it no wider than the viewport 866 tty_print_string (mcview_show_eof); 867 row++; 868 } 869 } 870 871 /* --------------------------------------------------------------------------------------------- */ 872 /** 873 * Move down. 874 * 875 * It's very simple. Just invisibly format the next "lines" lines, carefully carrying the formatter 876 * state in wrap mode. But before each step we need to check if we've already hit the end of the 877 * file, in that case we can no longer move. This is done by walking from dpy_state_bottom. 878 * 879 * Note that this relies on mcview_display_text() setting dpy_state_bottom to its correct value 880 * upon rendering the screen contents. So don't call this function from other functions (e.g. at 881 * the bottom of mcview_ascii_move_up()) which invalidate this value. 882 */ 883 void 884 mcview_ascii_move_down (WView *view, off_t lines) /*
*/ 885 { 886 while (lines-- != 0) 887 { 888 gboolean paragraph_ended; 889 890 /* See if there's still data below the bottom line, by imaginarily displaying one 891 * more line. This takes care of reading more data into growbuf, if required. 892 * If the end position didn't advance, we're at EOF and hence bail out. */ 893 if (mcview_display_line (view, &view->dpy_state_bottom, -1, ¶graph_ended, NULL) == 0) 894 break; 895 896 /* Okay, there's enough data. Move by 1 row at the top, too. No need to check for 897 * EOF, that can't happen. */ 898 if (!view->mode_flags.wrap) 899 { 900 view->dpy_start = mcview_eol (view, view->dpy_start); 901 view->dpy_paragraph_skip_lines = 0; 902 view->dpy_wrap_dirty = TRUE; 903 } 904 else 905 { 906 mcview_display_line (view, &view->dpy_state_top, -1, ¶graph_ended, NULL); 907 if (!paragraph_ended) 908 view->dpy_paragraph_skip_lines++; 909 else 910 { 911 view->dpy_start = view->dpy_state_top.offset; 912 view->dpy_paragraph_skip_lines = 0; 913 } 914 } 915 } 916 } 917 918 /* --------------------------------------------------------------------------------------------- */ 919 /** 920 * Move up. 921 * 922 * Unwrap mode: Piece of cake. Wrap mode: If we'd walk back more than the current line offset 923 * within the paragraph, we need to jump back to the previous paragraph and compute its height to 924 * see if we start from that paragraph, and repeat this if necessary. Once we're within the desired 925 * paragraph, we still need to format it from its beginning to know the state. 926 * 927 * See the top of this file for comments about MAX_BACKWARDS_WALK_IN_PARAGRAPH. 928 * 929 * force_max is a nice protection against the rare extreme case that the file underneath us 930 * changes, we don't want to endlessly consume a file of maybe full of zeros upon moving upwards. 931 */ 932 void 933 mcview_ascii_move_up (WView *view, off_t lines) /*
*/ 934 { 935 if (!view->mode_flags.wrap) 936 { 937 while (lines-- != 0) 938 view->dpy_start = mcview_bol (view, view->dpy_start - 1, 0); 939 view->dpy_paragraph_skip_lines = 0; 940 view->dpy_wrap_dirty = TRUE; 941 } 942 else 943 { 944 int i; 945 946 while (lines > view->dpy_paragraph_skip_lines) 947 { 948 // We need to go back to the previous paragraph. 949 if (view->dpy_start == 0) 950 { 951 // Oops, we're already in the first paragraph. 952 view->dpy_paragraph_skip_lines = 0; 953 mcview_state_machine_init (&view->dpy_state_top, 0); 954 return; 955 } 956 lines -= view->dpy_paragraph_skip_lines; 957 view->force_max = view->dpy_start; 958 view->dpy_start = mcview_bol (view, view->dpy_start - 1, 959 view->dpy_start - MAX_BACKWARDS_WALK_IN_PARAGRAPH); 960 mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); 961 /* This is a tricky way of denoting that we're at the end of the paragraph. 962 * Normally we'd jump to the next paragraph and reset paragraph_skip_lines. But for 963 * walking backwards this is exactly what we need. */ 964 view->dpy_paragraph_skip_lines = 965 mcview_display_paragraph (view, &view->dpy_state_top, view->data_area.lines); 966 view->force_max = -1; 967 } 968 969 /* Okay, we have have dpy_start pointing to the desired paragraph, and we still need to 970 * walk back "lines" lines from the current "dpy_paragraph_skip_lines" offset. We can't do 971 * that, so walk from the beginning of the paragraph. */ 972 mcview_state_machine_init (&view->dpy_state_top, view->dpy_start); 973 view->dpy_paragraph_skip_lines -= lines; 974 for (i = 0; i < view->dpy_paragraph_skip_lines; i++) 975 mcview_display_line (view, &view->dpy_state_top, -1, NULL, NULL); 976 } 977 } 978 979 /* --------------------------------------------------------------------------------------------- */ 980 981 void 982 mcview_ascii_moveto_bol (WView *view) /*
*/ 983 { 984 if (!view->mode_flags.wrap) 985 view->dpy_text_column = 0; 986 } 987 988 /* --------------------------------------------------------------------------------------------- */ 989 990 void 991 mcview_ascii_moveto_eol (WView *view) /*
*/ 992 { 993 if (!view->mode_flags.wrap) 994 { 995 mcview_state_machine_t state; 996 off_t linewidth; 997 998 // Get the width of the topmost paragraph. 999 mcview_state_machine_init (&state, view->dpy_start); 1000 mcview_display_line (view, &state, -1, NULL, &linewidth); 1001 view->dpy_text_column = DOZ (linewidth, (off_t) view->data_area.cols); 1002 } 1003 } 1004 1005 /* --------------------------------------------------------------------------------------------- */ 1006 1007 void 1008 mcview_state_machine_init (mcview_state_machine_t *state, off_t offset) /*
*/ 1009 { 1010 memset (state, 0, sizeof (*state)); 1011 state->offset = offset; 1012 state->print_lonely_combining = TRUE; 1013 } 1014 1015 /* --------------------------------------------------------------------------------------------- */