root/src/vfs/smbfs/helpers/lib/kanji.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. sj_strtok
  2. sj_strstr
  3. sj_strchr
  4. sj_strrchr
  5. skip_kanji_multibyte_char
  6. is_kanji_multibyte_char_1
  7. hangul_is_multibyte_char_1
  8. big5_is_multibyte_char_1
  9. simpch_is_multibyte_char_1
  10. generic_multibyte_strtok
  11. generic_multibyte_strstr
  12. generic_multibyte_strchr
  13. generic_multibyte_strrchr
  14. skip_generic_multibyte_char
  15. euc2sjis
  16. sjis2euc
  17. sj_to_euc
  18. euc_to_sj
  19. sjis2jis
  20. jis2sjis
  21. jis8_to_sj
  22. sj_to_jis8
  23. jis7_to_sj
  24. sj_to_jis7
  25. junet_to_sj
  26. sj_to_junet
  27. hex_to_sj
  28. sj_to_hex
  29. cap_to_sj
  30. sj_to_cap
  31. sj_to_sj
  32. setup_string_function
  33. interpret_coding_system
  34. skip_non_multibyte_char
  35. not_multibyte_char_1
  36. is_multibyte_codepage
  37. initialize_multibyte_vectors

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Kanji Extensions
   5 
   6    Copyright (C) Andrew Tridgell 1992-1998
   7 
   8    Copyright (C) 2011-2019
   9    Free Software Foundation, Inc.
  10 
  11    Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
  12    and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
  13    and add all jis codes sequence type at 1995.8.16
  14    Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
  15 
  16    This file is part of the Midnight Commander.
  17 
  18    The Midnight Commander is free software: you can redistribute it
  19    and/or modify it under the terms of the GNU General Public License as
  20    published by the Free Software Foundation, either version 3 of the License,
  21    or (at your option) any later version.
  22 
  23    The Midnight Commander is distributed in the hope that it will be useful,
  24    but WITHOUT ANY WARRANTY; without even the implied warranty of
  25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  26    GNU General Public License for more details.
  27 
  28    You should have received a copy of the GNU General Public License
  29    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  30  */
  31 
  32 #define _KANJI_C_
  33 #include "includes.h"
  34 
  35 /*
  36  * Function pointers that get overridden when multi-byte code pages
  37  * are loaded.
  38  */
  39 
  40 const char *(*multibyte_strchr) (const char *, int) = (const char *(*)(const char *, int)) strchr;
  41 const char *(*multibyte_strrchr) (const char *, int) = (const char *(*)(const char *, int)) strrchr;
  42 const char *(*multibyte_strstr) (const char *, const char *) =
  43     (const char *(*)(const char *, const char *)) strstr;
  44 char *(*multibyte_strtok) (char *, const char *) = (char *(*)(char *, const char *)) strtok;
  45 
  46 /*
  47  * Kanji is treated differently here due to historical accident of
  48  * it being the first non-English codepage added to Samba.
  49  * The define 'KANJI' is being overloaded to mean 'use kanji codepage
  50  * by default' and also 'this is the filename-to-disk conversion 
  51  * method to use'. This really should be removed and all control
  52  * over this left in the smb.conf parameters 'client codepage'
  53  * and 'coding system'.
  54  */
  55 
  56 #ifndef KANJI
  57 
  58 /*
  59  * Set the default conversion to be the functions in
  60  * charcnv.c.
  61  */
  62 
  63 static size_t skip_non_multibyte_char (char);
  64 static BOOL not_multibyte_char_1 (char);
  65 
  66 char *(*_dos_to_unix) (char *, BOOL) = dos2unix_format;
  67 char *(*_unix_to_dos) (char *, BOOL) = unix2dos_format;
  68 size_t (*_skip_multibyte_char) (char) = skip_non_multibyte_char;
  69 BOOL (*is_multibyte_char_1) (char) = not_multibyte_char_1;
  70 
  71 #else /* KANJI */
  72 
  73 /*
  74  * Set the default conversion to be the function
  75  * sj_to_sj in this file.
  76  */
  77 
  78 static char *sj_to_sj (char *from, BOOL overwrite);
  79 static size_t skip_kanji_multibyte_char (char);
  80 static BOOL is_kanji_multibyte_char_1 (char);
  81 
  82 char *(*_dos_to_unix) (char *, BOOL) = sj_to_sj;
  83 char *(*_unix_to_dos) (char *, BOOL) = sj_to_sj;
  84 size_t (*_skip_multibyte_char) (char) = skip_kanji_multibyte_char;
  85 int (*is_multibyte_char_1) (char) = is_kanji_multibyte_char_1;
  86 
  87 #endif /* KANJI */
  88 
  89 /* jis si/so sequence */
  90 static char jis_kso = JIS_KSO;
  91 static char jis_ksi = JIS_KSI;
  92 static char hex_tag = HEXTAG;
  93 
  94 /*******************************************************************
  95   SHIFT JIS functions
  96 ********************************************************************/
  97 
  98 /*******************************************************************
  99  search token from S1 separated any char of S2
 100  S1 contains SHIFT JIS chars.
 101 ********************************************************************/
 102 
 103 static char *
 104 sj_strtok (char *s1, const char *s2)
     /* [previous][next][first][last][top][bottom][index][help]  */
 105 {
 106     static char *s = NULL;
 107     char *q;
 108     if (!s1)
 109     {
 110         if (!s)
 111         {
 112             return NULL;
 113         }
 114         s1 = s;
 115     }
 116     for (q = s1; *s1;)
 117     {
 118         if (is_shift_jis (*s1))
 119         {
 120             s1 += 2;
 121         }
 122         else if (is_kana (*s1))
 123         {
 124             s1++;
 125         }
 126         else
 127         {
 128             char *p = strchr (s2, *s1);
 129             if (p)
 130             {
 131                 if (s1 != q)
 132                 {
 133                     s = s1 + 1;
 134                     *s1 = '\0';
 135                     return q;
 136                 }
 137                 q = s1 + 1;
 138             }
 139             s1++;
 140         }
 141     }
 142     s = NULL;
 143     if (*q)
 144     {
 145         return q;
 146     }
 147     return NULL;
 148 }
 149 
 150 /*******************************************************************
 151  search string S2 from S1
 152  S1 contains SHIFT JIS chars.
 153 ********************************************************************/
 154 
 155 static const char *
 156 sj_strstr (const char *s1, const char *s2)
     /* [previous][next][first][last][top][bottom][index][help]  */
 157 {
 158     size_t len = strlen (s2);
 159     if (!*s2)
 160         return (const char *) s1;
 161     for (; *s1;)
 162     {
 163         if (*s1 == *s2)
 164         {
 165             if (strncmp (s1, s2, len) == 0)
 166                 return (const char *) s1;
 167         }
 168         if (is_shift_jis (*s1))
 169         {
 170             s1 += 2;
 171         }
 172         else
 173         {
 174             s1++;
 175         }
 176     }
 177     return NULL;
 178 }
 179 
 180 /*******************************************************************
 181  Search char C from beginning of S.
 182  S contains SHIFT JIS chars.
 183 ********************************************************************/
 184 
 185 static const char *
 186 sj_strchr (const char *s, int c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 187 {
 188     for (; *s;)
 189     {
 190         if (*s == c)
 191             return (const char *) s;
 192         if (is_shift_jis (*s))
 193         {
 194             s += 2;
 195         }
 196         else
 197         {
 198             s++;
 199         }
 200     }
 201     return NULL;
 202 }
 203 
 204 /*******************************************************************
 205  Search char C end of S.
 206  S contains SHIFT JIS chars.
 207 ********************************************************************/
 208 
 209 static const char *
 210 sj_strrchr (const char *s, int c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 211 {
 212     const char *q;
 213 
 214     for (q = 0; *s;)
 215     {
 216         if (*s == c)
 217         {
 218             q = (const char *) s;
 219         }
 220         if (is_shift_jis (*s))
 221         {
 222             s += 2;
 223         }
 224         else
 225         {
 226             s++;
 227         }
 228     }
 229     return q;
 230 }
 231 
 232 /*******************************************************************
 233  Kanji multibyte char skip function.
 234 *******************************************************************/
 235 
 236 static size_t
 237 skip_kanji_multibyte_char (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 238 {
 239     if (is_shift_jis (c))
 240     {
 241         return 2;
 242     }
 243     else if (is_kana (c))
 244     {
 245         return 1;
 246     }
 247     return 0;
 248 }
 249 
 250 /*******************************************************************
 251  Kanji multibyte char identification.
 252 *******************************************************************/
 253 
 254 static BOOL
 255 is_kanji_multibyte_char_1 (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 256 {
 257     return is_shift_jis (c);
 258 }
 259 
 260 /*******************************************************************
 261  The following functions are the only ones needed to do multibyte
 262  support for Hangul, Big5 and Simplified Chinese. Most of the
 263  real work for these codepages is done in the generic multibyte
 264  functions. The only reason these functions are needed at all
 265  is that the is_xxx(c) calls are really preprocessor macros.
 266 ********************************************************************/
 267 
 268 /*******************************************************************
 269   Hangul (Korean - code page 949) function.
 270 ********************************************************************/
 271 
 272 static BOOL
 273 hangul_is_multibyte_char_1 (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 274 {
 275     return is_hangul (c);
 276 }
 277 
 278 /*******************************************************************
 279   Big5 Traditional Chinese (code page 950) function.
 280 ********************************************************************/
 281 
 282 static BOOL
 283 big5_is_multibyte_char_1 (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 284 {
 285     return is_big5_c1 (c);
 286 }
 287 
 288 /*******************************************************************
 289   Simplified Chinese (code page 936) function.
 290 ********************************************************************/
 291 
 292 static BOOL
 293 simpch_is_multibyte_char_1 (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 294 {
 295     return is_simpch_c1 (c);
 296 }
 297 
 298 /*******************************************************************
 299   Generic multibyte functions - used by Hangul, Big5 and Simplified
 300   Chinese codepages.
 301 ********************************************************************/
 302 
 303 /*******************************************************************
 304  search token from S1 separated any char of S2
 305  S1 contains generic multibyte chars.
 306 ********************************************************************/
 307 
 308 static char *
 309 generic_multibyte_strtok (char *s1, const char *s2)
     /* [previous][next][first][last][top][bottom][index][help]  */
 310 {
 311     static char *s = NULL;
 312     char *q;
 313     if (!s1)
 314     {
 315         if (!s)
 316         {
 317             return NULL;
 318         }
 319         s1 = s;
 320     }
 321     for (q = s1; *s1;)
 322     {
 323         if ((*is_multibyte_char_1) (*s1))
 324         {
 325             s1 += 2;
 326         }
 327         else
 328         {
 329             char *p = strchr (s2, *s1);
 330             if (p)
 331             {
 332                 if (s1 != q)
 333                 {
 334                     s = s1 + 1;
 335                     *s1 = '\0';
 336                     return q;
 337                 }
 338                 q = s1 + 1;
 339             }
 340             s1++;
 341         }
 342     }
 343     s = NULL;
 344     if (*q)
 345     {
 346         return q;
 347     }
 348     return NULL;
 349 }
 350 
 351 /*******************************************************************
 352  search string S2 from S1
 353  S1 contains generic multibyte chars.
 354 ********************************************************************/
 355 
 356 static const char *
 357 generic_multibyte_strstr (const char *s1, const char *s2)
     /* [previous][next][first][last][top][bottom][index][help]  */
 358 {
 359     size_t len = strlen (s2);
 360     if (!*s2)
 361         return (const char *) s1;
 362     for (; *s1;)
 363     {
 364         if (*s1 == *s2)
 365         {
 366             if (strncmp (s1, s2, len) == 0)
 367                 return (const char *) s1;
 368         }
 369         if ((*is_multibyte_char_1) (*s1))
 370         {
 371             s1 += 2;
 372         }
 373         else
 374         {
 375             s1++;
 376         }
 377     }
 378     return NULL;
 379 }
 380 
 381 /*******************************************************************
 382  Search char C from beginning of S.
 383  S contains generic multibyte chars.
 384 ********************************************************************/
 385 
 386 static const char *
 387 generic_multibyte_strchr (const char *s, int c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 388 {
 389     for (; *s;)
 390     {
 391         if (*s == c)
 392             return (const char *) s;
 393         if ((*is_multibyte_char_1) (*s))
 394         {
 395             s += 2;
 396         }
 397         else
 398         {
 399             s++;
 400         }
 401     }
 402     return NULL;
 403 }
 404 
 405 /*******************************************************************
 406  Search char C end of S.
 407  S contains generic multibyte chars.
 408 ********************************************************************/
 409 
 410 static const char *
 411 generic_multibyte_strrchr (const char *s, int c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 412 {
 413     const char *q;
 414 
 415     for (q = 0; *s;)
 416     {
 417         if (*s == c)
 418         {
 419             q = (const char *) s;
 420         }
 421         if ((*is_multibyte_char_1) (*s))
 422         {
 423             s += 2;
 424         }
 425         else
 426         {
 427             s++;
 428         }
 429     }
 430     return q;
 431 }
 432 
 433 /*******************************************************************
 434  Generic multibyte char skip function.
 435 *******************************************************************/
 436 
 437 static size_t
 438 skip_generic_multibyte_char (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
 439 {
 440     if ((*is_multibyte_char_1) (c))
 441     {
 442         return 2;
 443     }
 444     return 0;
 445 }
 446 
 447 /*******************************************************************
 448   Code conversion
 449 ********************************************************************/
 450 
 451 /* convesion buffer */
 452 static char cvtbuf[1024];
 453 
 454 /*******************************************************************
 455   EUC <-> SJIS
 456 ********************************************************************/
 457 
 458 static int
 459 euc2sjis (int hi, int lo)
     /* [previous][next][first][last][top][bottom][index][help]  */
 460 {
 461     if (hi & 1)
 462         return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61));
 463     else
 464         return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
 465 }
 466 
 467 static int
 468 sjis2euc (int hi, int lo)
     /* [previous][next][first][last][top][bottom][index][help]  */
 469 {
 470     if (lo >= 0x9f)
 471         return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
 472     else
 473         return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) | (lo + (lo >= 0x7f ? 0x60 : 0x61));
 474 }
 475 
 476 /*******************************************************************
 477  Convert FROM contain SHIFT JIS codes to EUC codes
 478  return converted buffer
 479 ********************************************************************/
 480 
 481 static char *
 482 sj_to_euc (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 483 {
 484     char *out;
 485     char *save;
 486 
 487     save = (char *) from;
 488     for (out = cvtbuf; *from;)
 489     {
 490         if (is_shift_jis (*from))
 491         {
 492             int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
 493             *out++ = (code >> 8) & 0xff;
 494             *out++ = code;
 495             from += 2;
 496         }
 497         else if (is_kana (*from))
 498         {
 499             *out++ = (char) euc_kana;
 500             *out++ = *from++;
 501         }
 502         else
 503         {
 504             *out++ = *from++;
 505         }
 506     }
 507     *out = 0;
 508     if (overwrite)
 509     {
 510         pstrcpy ((char *) save, (char *) cvtbuf);
 511         return (char *) save;
 512     }
 513     else
 514     {
 515         return cvtbuf;
 516     }
 517 }
 518 
 519 /*******************************************************************
 520  Convert FROM contain EUC codes to SHIFT JIS codes
 521  return converted buffer
 522 ********************************************************************/
 523 
 524 static char *
 525 euc_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 526 {
 527     char *out;
 528     char *save;
 529 
 530     save = (char *) from;
 531     for (out = cvtbuf; *from;)
 532     {
 533         if (is_euc (*from))
 534         {
 535             int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 536             *out++ = (code >> 8) & 0xff;
 537             *out++ = code;
 538             from += 2;
 539         }
 540         else if (is_euc_kana (*from))
 541         {
 542             *out++ = from[1];
 543             from += 2;
 544         }
 545         else
 546         {
 547             *out++ = *from++;
 548         }
 549     }
 550     *out = 0;
 551     if (overwrite)
 552     {
 553         pstrcpy (save, (char *) cvtbuf);
 554         return save;
 555     }
 556     else
 557     {
 558         return cvtbuf;
 559     }
 560 }
 561 
 562 /*******************************************************************
 563   JIS7,JIS8,JUNET <-> SJIS
 564 ********************************************************************/
 565 
 566 static int
 567 sjis2jis (int hi, int lo)
     /* [previous][next][first][last][top][bottom][index][help]  */
 568 {
 569     if (lo >= 0x9f)
 570         return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
 571     else
 572         return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) | (lo - (lo >= 0x7f ? 0x20 : 0x1f));
 573 }
 574 
 575 static int
 576 jis2sjis (int hi, int lo)
     /* [previous][next][first][last][top][bottom][index][help]  */
 577 {
 578     if (hi & 1)
 579         return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) | (lo + (lo >= 0x60 ? 0x20 : 0x1f));
 580     else
 581         return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
 582 }
 583 
 584 /*******************************************************************
 585  Convert FROM contain JIS codes to SHIFT JIS codes
 586  return converted buffer
 587 ********************************************************************/
 588 
 589 static char *
 590 jis8_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 591 {
 592     char *out;
 593     int shifted;
 594     char *save;
 595 
 596     shifted = _KJ_ROMAN;
 597     save = (char *) from;
 598     for (out = cvtbuf; *from;)
 599     {
 600         if (is_esc (*from))
 601         {
 602             if (is_so1 (from[1]) && is_so2 (from[2]))
 603             {
 604                 shifted = _KJ_KANJI;
 605                 from += 3;
 606             }
 607             else if (is_si1 (from[1]) && is_si2 (from[2]))
 608             {
 609                 shifted = _KJ_ROMAN;
 610                 from += 3;
 611             }
 612             else
 613             {                   /* sequence error */
 614                 goto normal;
 615             }
 616         }
 617         else
 618         {
 619           normal:
 620             switch (shifted)
 621             {
 622             default:
 623             case _KJ_ROMAN:
 624                 *out++ = *from++;
 625                 break;
 626             case _KJ_KANJI:
 627                 {
 628                     int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 629                     *out++ = (code >> 8) & 0xff;
 630                     *out++ = code;
 631                     from += 2;
 632                 }
 633                 break;
 634             }
 635         }
 636     }
 637     *out = 0;
 638     if (overwrite)
 639     {
 640         pstrcpy (save, (char *) cvtbuf);
 641         return save;
 642     }
 643     else
 644     {
 645         return cvtbuf;
 646     }
 647 }
 648 
 649 /*******************************************************************
 650  Convert FROM contain SHIFT JIS codes to JIS codes
 651  return converted buffer
 652 ********************************************************************/
 653 
 654 static char *
 655 sj_to_jis8 (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 656 {
 657     char *out;
 658     int shifted;
 659     char *save;
 660 
 661     shifted = _KJ_ROMAN;
 662     save = (char *) from;
 663     for (out = cvtbuf; *from;)
 664     {
 665         if (is_shift_jis (*from))
 666         {
 667             int code;
 668             switch (shifted)
 669             {
 670             case _KJ_ROMAN:    /* to KANJI */
 671                 *out++ = jis_esc;
 672                 *out++ = jis_so1;
 673                 *out++ = jis_kso;
 674                 shifted = _KJ_KANJI;
 675                 break;
 676             }
 677             code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 678             *out++ = (code >> 8) & 0xff;
 679             *out++ = code;
 680             from += 2;
 681         }
 682         else
 683         {
 684             switch (shifted)
 685             {
 686             case _KJ_KANJI:    /* to ROMAN/KANA */
 687                 *out++ = jis_esc;
 688                 *out++ = jis_si1;
 689                 *out++ = jis_ksi;
 690                 shifted = _KJ_ROMAN;
 691                 break;
 692             }
 693             *out++ = *from++;
 694         }
 695     }
 696     switch (shifted)
 697     {
 698     case _KJ_KANJI:            /* to ROMAN/KANA */
 699         *out++ = jis_esc;
 700         *out++ = jis_si1;
 701         *out++ = jis_ksi;
 702         shifted = _KJ_ROMAN;
 703         break;
 704     }
 705     *out = 0;
 706     if (overwrite)
 707     {
 708         pstrcpy (save, (char *) cvtbuf);
 709         return save;
 710     }
 711     else
 712     {
 713         return cvtbuf;
 714     }
 715 }
 716 
 717 /*******************************************************************
 718  Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
 719  return converted buffer
 720 ********************************************************************/
 721 static char *
 722 jis7_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 723 {
 724     char *out;
 725     int shifted;
 726     char *save;
 727 
 728     shifted = _KJ_ROMAN;
 729     save = (char *) from;
 730     for (out = cvtbuf; *from;)
 731     {
 732         if (is_esc (*from))
 733         {
 734             if (is_so1 (from[1]) && is_so2 (from[2]))
 735             {
 736                 shifted = _KJ_KANJI;
 737                 from += 3;
 738             }
 739             else if (is_si1 (from[1]) && is_si2 (from[2]))
 740             {
 741                 shifted = _KJ_ROMAN;
 742                 from += 3;
 743             }
 744             else
 745             {                   /* sequence error */
 746                 goto normal;
 747             }
 748         }
 749         else if (is_so (*from))
 750         {
 751             shifted = _KJ_KANA; /* to KANA */
 752             from++;
 753         }
 754         else if (is_si (*from))
 755         {
 756             shifted = _KJ_ROMAN;        /* to ROMAN */
 757             from++;
 758         }
 759         else
 760         {
 761           normal:
 762             switch (shifted)
 763             {
 764             default:
 765             case _KJ_ROMAN:
 766                 *out++ = *from++;
 767                 break;
 768             case _KJ_KANJI:
 769                 {
 770                     int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 771                     *out++ = (code >> 8) & 0xff;
 772                     *out++ = code;
 773                     from += 2;
 774                 }
 775                 break;
 776             case _KJ_KANA:
 777                 *out++ = ((int) from[0]) + 0x80;
 778                 break;
 779             }
 780         }
 781     }
 782     *out = 0;
 783     if (overwrite)
 784     {
 785         pstrcpy (save, (char *) cvtbuf);
 786         return save;
 787     }
 788     else
 789     {
 790         return cvtbuf;
 791     }
 792 }
 793 
 794 /*******************************************************************
 795  Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
 796  return converted buffer
 797 ********************************************************************/
 798 static char *
 799 sj_to_jis7 (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 800 {
 801     char *out;
 802     int shifted;
 803     char *save;
 804 
 805     shifted = _KJ_ROMAN;
 806     save = (char *) from;
 807     for (out = cvtbuf; *from;)
 808     {
 809         if (is_shift_jis (*from))
 810         {
 811             int code;
 812             switch (shifted)
 813             {
 814             case _KJ_KANA:
 815                 *out++ = jis_si;        /* to ROMAN and through down */
 816             case _KJ_ROMAN:    /* to KANJI */
 817                 *out++ = jis_esc;
 818                 *out++ = jis_so1;
 819                 *out++ = jis_kso;
 820                 shifted = _KJ_KANJI;
 821                 break;
 822             }
 823             code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 824             *out++ = (code >> 8) & 0xff;
 825             *out++ = code;
 826             from += 2;
 827         }
 828         else if (is_kana (from[0]))
 829         {
 830             switch (shifted)
 831             {
 832             case _KJ_KANJI:    /* to ROMAN */
 833                 *out++ = jis_esc;
 834                 *out++ = jis_si1;
 835                 *out++ = jis_ksi;
 836             case _KJ_ROMAN:    /* to KANA */
 837                 *out++ = jis_so;
 838                 shifted = _KJ_KANA;
 839                 break;
 840             }
 841             *out++ = ((int) *from++) - 0x80;
 842         }
 843         else
 844         {
 845             switch (shifted)
 846             {
 847             case _KJ_KANA:
 848                 *out++ = jis_si;        /* to ROMAN */
 849                 shifted = _KJ_ROMAN;
 850                 break;
 851             case _KJ_KANJI:    /* to ROMAN */
 852                 *out++ = jis_esc;
 853                 *out++ = jis_si1;
 854                 *out++ = jis_ksi;
 855                 shifted = _KJ_ROMAN;
 856                 break;
 857             }
 858             *out++ = *from++;
 859         }
 860     }
 861     switch (shifted)
 862     {
 863     case _KJ_KANA:
 864         *out++ = jis_si;        /* to ROMAN */
 865         break;
 866     case _KJ_KANJI:            /* to ROMAN */
 867         *out++ = jis_esc;
 868         *out++ = jis_si1;
 869         *out++ = jis_ksi;
 870         break;
 871     }
 872     *out = 0;
 873     if (overwrite)
 874     {
 875         pstrcpy (save, (char *) cvtbuf);
 876         return save;
 877     }
 878     else
 879     {
 880         return cvtbuf;
 881     }
 882 }
 883 
 884 /*******************************************************************
 885  Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
 886  return converted buffer
 887 ********************************************************************/
 888 static char *
 889 junet_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 890 {
 891     char *out;
 892     int shifted;
 893     char *save;
 894 
 895     shifted = _KJ_ROMAN;
 896     save = (char *) from;
 897     for (out = cvtbuf; *from;)
 898     {
 899         if (is_esc (*from))
 900         {
 901             if (is_so1 (from[1]) && is_so2 (from[2]))
 902             {
 903                 shifted = _KJ_KANJI;
 904                 from += 3;
 905             }
 906             else if (is_si1 (from[1]) && is_si2 (from[2]))
 907             {
 908                 shifted = _KJ_ROMAN;
 909                 from += 3;
 910             }
 911             else if (is_juk1 (from[1]) && is_juk2 (from[2]))
 912             {
 913                 shifted = _KJ_KANA;
 914                 from += 3;
 915             }
 916             else
 917             {                   /* sequence error */
 918                 goto normal;
 919             }
 920         }
 921         else
 922         {
 923           normal:
 924             switch (shifted)
 925             {
 926             default:
 927             case _KJ_ROMAN:
 928                 *out++ = *from++;
 929                 break;
 930             case _KJ_KANJI:
 931                 {
 932                     int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 933                     *out++ = (code >> 8) & 0xff;
 934                     *out++ = code;
 935                     from += 2;
 936                 }
 937                 break;
 938             case _KJ_KANA:
 939                 *out++ = ((int) from[0]) + 0x80;
 940                 break;
 941             }
 942         }
 943     }
 944     *out = 0;
 945     if (overwrite)
 946     {
 947         pstrcpy (save, (char *) cvtbuf);
 948         return save;
 949     }
 950     else
 951     {
 952         return cvtbuf;
 953     }
 954 }
 955 
 956 /*******************************************************************
 957  Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
 958  return converted buffer
 959 ********************************************************************/
 960 static char *
 961 sj_to_junet (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
 962 {
 963     char *out;
 964     int shifted;
 965     char *save;
 966 
 967     shifted = _KJ_ROMAN;
 968     save = (char *) from;
 969     for (out = cvtbuf; *from;)
 970     {
 971         if (is_shift_jis (*from))
 972         {
 973             int code;
 974             switch (shifted)
 975             {
 976             case _KJ_KANA:
 977             case _KJ_ROMAN:    /* to KANJI */
 978                 *out++ = jis_esc;
 979                 *out++ = jis_so1;
 980                 *out++ = jis_so2;
 981                 shifted = _KJ_KANJI;
 982                 break;
 983             }
 984             code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 985             *out++ = (code >> 8) & 0xff;
 986             *out++ = code;
 987             from += 2;
 988         }
 989         else if (is_kana (from[0]))
 990         {
 991             switch (shifted)
 992             {
 993             case _KJ_KANJI:    /* to ROMAN */
 994             case _KJ_ROMAN:    /* to KANA */
 995                 *out++ = jis_esc;
 996                 *out++ = junet_kana1;
 997                 *out++ = junet_kana2;
 998                 shifted = _KJ_KANA;
 999                 break;
1000             }
1001             *out++ = ((int) *from++) - 0x80;
1002         }
1003         else
1004         {
1005             switch (shifted)
1006             {
1007             case _KJ_KANA:
1008             case _KJ_KANJI:    /* to ROMAN */
1009                 *out++ = jis_esc;
1010                 *out++ = jis_si1;
1011                 *out++ = jis_si2;
1012                 shifted = _KJ_ROMAN;
1013                 break;
1014             }
1015             *out++ = *from++;
1016         }
1017     }
1018     switch (shifted)
1019     {
1020     case _KJ_KANA:
1021     case _KJ_KANJI:            /* to ROMAN */
1022         *out++ = jis_esc;
1023         *out++ = jis_si1;
1024         *out++ = jis_si2;
1025         break;
1026     }
1027     *out = 0;
1028     if (overwrite)
1029     {
1030         pstrcpy (save, (char *) cvtbuf);
1031         return save;
1032     }
1033     else
1034     {
1035         return cvtbuf;
1036     }
1037 }
1038 
1039 /*******************************************************************
1040   HEX <-> SJIS
1041 ********************************************************************/
1042 /* ":xx" -> a byte */
1043 static char *
1044 hex_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
1045 {
1046     char *sp, *dp;
1047 
1048     sp = (char *) from;
1049     dp = cvtbuf;
1050     while (*sp)
1051     {
1052         if (*sp == hex_tag && isxdigit ((int) sp[1]) && isxdigit ((int) sp[2]))
1053         {
1054             *dp++ = (hex2bin (sp[1]) << 4) | (hex2bin (sp[2]));
1055             sp += 3;
1056         }
1057         else
1058             *dp++ = *sp++;
1059     }
1060     *dp = '\0';
1061     if (overwrite)
1062     {
1063         pstrcpy ((char *) from, (char *) cvtbuf);
1064         return (char *) from;
1065     }
1066     else
1067     {
1068         return cvtbuf;
1069     }
1070 }
1071 
1072 /*******************************************************************
1073   kanji/kana -> ":xx" 
1074 ********************************************************************/
1075 static char *
1076 sj_to_hex (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
1077 {
1078     unsigned char *sp, *dp;
1079 
1080     sp = (unsigned char *) from;
1081     dp = (unsigned char *) cvtbuf;
1082     while (*sp)
1083     {
1084         if (is_kana (*sp))
1085         {
1086             *dp++ = hex_tag;
1087             *dp++ = bin2hex (((*sp) >> 4) & 0x0f);
1088             *dp++ = bin2hex ((*sp) & 0x0f);
1089             sp++;
1090         }
1091         else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1]))
1092         {
1093             *dp++ = hex_tag;
1094             *dp++ = bin2hex (((*sp) >> 4) & 0x0f);
1095             *dp++ = bin2hex ((*sp) & 0x0f);
1096             sp++;
1097             *dp++ = hex_tag;
1098             *dp++ = bin2hex (((*sp) >> 4) & 0x0f);
1099             *dp++ = bin2hex ((*sp) & 0x0f);
1100             sp++;
1101         }
1102         else
1103             *dp++ = *sp++;
1104     }
1105     *dp = '\0';
1106     if (overwrite)
1107     {
1108         pstrcpy ((char *) from, (char *) cvtbuf);
1109         return (char *) from;
1110     }
1111     else
1112     {
1113         return cvtbuf;
1114     }
1115 }
1116 
1117 /*******************************************************************
1118   CAP <-> SJIS
1119 ********************************************************************/
1120 /* ":xx" CAP -> a byte */
1121 static char *
1122 cap_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
1123 {
1124     char *sp, *dp;
1125 
1126     sp = (char *) from;
1127     dp = cvtbuf;
1128     while (*sp)
1129     {
1130         /*
1131          * The only change between this and hex_to_sj is here. sj_to_cap only
1132          * translates characters greater or equal to 0x80 - make sure that here
1133          * we only do the reverse (that's why the strchr is used rather than
1134          * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
1135          */
1136         if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit ((int) sp[2]))
1137         {
1138             *dp++ = (hex2bin (sp[1]) << 4) | (hex2bin (sp[2]));
1139             sp += 3;
1140         }
1141         else
1142             *dp++ = *sp++;
1143     }
1144     *dp = '\0';
1145     if (overwrite)
1146     {
1147         pstrcpy ((char *) from, (char *) cvtbuf);
1148         return (char *) from;
1149     }
1150     else
1151     {
1152         return cvtbuf;
1153     }
1154 }
1155 
1156 /*******************************************************************
1157   kanji/kana -> ":xx" - CAP format.
1158 ********************************************************************/
1159 static char *
1160 sj_to_cap (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
1161 {
1162     unsigned char *sp, *dp;
1163 
1164     sp = (unsigned char *) from;
1165     dp = (unsigned char *) cvtbuf;
1166     while (*sp)
1167     {
1168         if (*sp >= 0x80)
1169         {
1170             *dp++ = hex_tag;
1171             *dp++ = bin2hex (((*sp) >> 4) & 0x0f);
1172             *dp++ = bin2hex ((*sp) & 0x0f);
1173             sp++;
1174         }
1175         else
1176         {
1177             *dp++ = *sp++;
1178         }
1179     }
1180     *dp = '\0';
1181     if (overwrite)
1182     {
1183         pstrcpy ((char *) from, (char *) cvtbuf);
1184         return (char *) from;
1185     }
1186     else
1187     {
1188         return cvtbuf;
1189     }
1190 }
1191 
1192 /*******************************************************************
1193  sj to sj
1194 ********************************************************************/
1195 static char *
1196 sj_to_sj (char *from, BOOL overwrite)
     /* [previous][next][first][last][top][bottom][index][help]  */
1197 {
1198     if (!overwrite)
1199     {
1200         pstrcpy (cvtbuf, (char *) from);
1201         return cvtbuf;
1202     }
1203     else
1204     {
1205         return (char *) from;
1206     }
1207 }
1208 
1209 /************************************************************************
1210  conversion:
1211  _dos_to_unix           _unix_to_dos
1212 ************************************************************************/
1213 
1214 static void
1215 setup_string_function (int codes)
     /* [previous][next][first][last][top][bottom][index][help]  */
1216 {
1217     switch (codes)
1218     {
1219     default:
1220         _dos_to_unix = dos2unix_format;
1221         _unix_to_dos = unix2dos_format;
1222         break;
1223 
1224     case SJIS_CODE:
1225         _dos_to_unix = sj_to_sj;
1226         _unix_to_dos = sj_to_sj;
1227         break;
1228 
1229     case EUC_CODE:
1230         _dos_to_unix = sj_to_euc;
1231         _unix_to_dos = euc_to_sj;
1232         break;
1233 
1234     case JIS7_CODE:
1235         _dos_to_unix = sj_to_jis7;
1236         _unix_to_dos = jis7_to_sj;
1237         break;
1238 
1239     case JIS8_CODE:
1240         _dos_to_unix = sj_to_jis8;
1241         _unix_to_dos = jis8_to_sj;
1242         break;
1243 
1244     case JUNET_CODE:
1245         _dos_to_unix = sj_to_junet;
1246         _unix_to_dos = junet_to_sj;
1247         break;
1248 
1249     case HEX_CODE:
1250         _dos_to_unix = sj_to_hex;
1251         _unix_to_dos = hex_to_sj;
1252         break;
1253 
1254     case CAP_CODE:
1255         _dos_to_unix = sj_to_cap;
1256         _unix_to_dos = cap_to_sj;
1257         break;
1258     }
1259 }
1260 
1261 /************************************************************************
1262  Interpret coding system.
1263 ************************************************************************/
1264 
1265 void
1266 interpret_coding_system (const char *str)
     /* [previous][next][first][last][top][bottom][index][help]  */
1267 {
1268     int codes = UNKNOWN_CODE;
1269 
1270     if (strequal (str, "sjis"))
1271     {
1272         codes = SJIS_CODE;
1273     }
1274     else if (strequal (str, "euc"))
1275     {
1276         codes = EUC_CODE;
1277     }
1278     else if (strequal (str, "cap"))
1279     {
1280         codes = CAP_CODE;
1281         hex_tag = HEXTAG;
1282     }
1283     else if (strequal (str, "hex"))
1284     {
1285         codes = HEX_CODE;
1286         hex_tag = HEXTAG;
1287     }
1288     else if (!strncasecmp (str, "hex", 3))
1289     {
1290         codes = HEX_CODE;
1291         hex_tag = (str[3] ? str[3] : HEXTAG);
1292     }
1293     else if (strequal (str, "j8bb"))
1294     {
1295         codes = JIS8_CODE;
1296         jis_kso = 'B';
1297         jis_ksi = 'B';
1298     }
1299     else if (strequal (str, "j8bj") || strequal (str, "jis8"))
1300     {
1301         codes = JIS8_CODE;
1302         jis_kso = 'B';
1303         jis_ksi = 'J';
1304     }
1305     else if (strequal (str, "j8bh"))
1306     {
1307         codes = JIS8_CODE;
1308         jis_kso = 'B';
1309         jis_ksi = 'H';
1310     }
1311     else if (strequal (str, "j8@b"))
1312     {
1313         codes = JIS8_CODE;
1314         jis_kso = '@';
1315         jis_ksi = 'B';
1316     }
1317     else if (strequal (str, "j8@j"))
1318     {
1319         codes = JIS8_CODE;
1320         jis_kso = '@';
1321         jis_ksi = 'J';
1322     }
1323     else if (strequal (str, "j8@h"))
1324     {
1325         codes = JIS8_CODE;
1326         jis_kso = '@';
1327         jis_ksi = 'H';
1328     }
1329     else if (strequal (str, "j7bb"))
1330     {
1331         codes = JIS7_CODE;
1332         jis_kso = 'B';
1333         jis_ksi = 'B';
1334     }
1335     else if (strequal (str, "j7bj") || strequal (str, "jis7"))
1336     {
1337         codes = JIS7_CODE;
1338         jis_kso = 'B';
1339         jis_ksi = 'J';
1340     }
1341     else if (strequal (str, "j7bh"))
1342     {
1343         codes = JIS7_CODE;
1344         jis_kso = 'B';
1345         jis_ksi = 'H';
1346     }
1347     else if (strequal (str, "j7@b"))
1348     {
1349         codes = JIS7_CODE;
1350         jis_kso = '@';
1351         jis_ksi = 'B';
1352     }
1353     else if (strequal (str, "j7@j"))
1354     {
1355         codes = JIS7_CODE;
1356         jis_kso = '@';
1357         jis_ksi = 'J';
1358     }
1359     else if (strequal (str, "j7@h"))
1360     {
1361         codes = JIS7_CODE;
1362         jis_kso = '@';
1363         jis_ksi = 'H';
1364     }
1365     else if (strequal (str, "jubb"))
1366     {
1367         codes = JUNET_CODE;
1368         jis_kso = 'B';
1369         jis_ksi = 'B';
1370     }
1371     else if (strequal (str, "jubj") || strequal (str, "junet"))
1372     {
1373         codes = JUNET_CODE;
1374         jis_kso = 'B';
1375         jis_ksi = 'J';
1376     }
1377     else if (strequal (str, "jubh"))
1378     {
1379         codes = JUNET_CODE;
1380         jis_kso = 'B';
1381         jis_ksi = 'H';
1382     }
1383     else if (strequal (str, "ju@b"))
1384     {
1385         codes = JUNET_CODE;
1386         jis_kso = '@';
1387         jis_ksi = 'B';
1388     }
1389     else if (strequal (str, "ju@j"))
1390     {
1391         codes = JUNET_CODE;
1392         jis_kso = '@';
1393         jis_ksi = 'J';
1394     }
1395     else if (strequal (str, "ju@h"))
1396     {
1397         codes = JUNET_CODE;
1398         jis_kso = '@';
1399         jis_ksi = 'H';
1400     }
1401     setup_string_function (codes);
1402 }
1403 
1404 /*******************************************************************
1405  Non multibyte char function.
1406 *******************************************************************/
1407 
1408 static size_t
1409 skip_non_multibyte_char (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
1410 {
1411     (void) c;
1412     return 0;
1413 }
1414 
1415 /*******************************************************************
1416  Function that always says a character isn't multibyte.
1417 *******************************************************************/
1418 
1419 static BOOL
1420 not_multibyte_char_1 (char c)
     /* [previous][next][first][last][top][bottom][index][help]  */
1421 {
1422     (void) c;
1423     return False;
1424 }
1425 
1426 /*******************************************************************
1427  Function to determine if we are in a multibyte code page.
1428 *******************************************************************/
1429 
1430 static BOOL is_multibyte_codepage_val = False;
1431 
1432 BOOL
1433 is_multibyte_codepage (void)
     /* [previous][next][first][last][top][bottom][index][help]  */
1434 {
1435     return is_multibyte_codepage_val;
1436 }
1437 
1438 /*******************************************************************
1439  Setup the function pointers for the functions that are replaced
1440  when multi-byte codepages are used.
1441 
1442  The dos_to_unix and unix_to_dos function pointers are only
1443  replaced by setup_string_function called by interpret_coding_system
1444  above.
1445 *******************************************************************/
1446 
1447 void
1448 initialize_multibyte_vectors (int client_codepage)
     /* [previous][next][first][last][top][bottom][index][help]  */
1449 {
1450     switch (client_codepage)
1451     {
1452     case KANJI_CODEPAGE:
1453         multibyte_strchr = sj_strchr;
1454         multibyte_strrchr = sj_strrchr;
1455         multibyte_strstr = sj_strstr;
1456         multibyte_strtok = sj_strtok;
1457         _skip_multibyte_char = skip_kanji_multibyte_char;
1458         is_multibyte_char_1 = is_kanji_multibyte_char_1;
1459         is_multibyte_codepage_val = True;
1460         break;
1461     case HANGUL_CODEPAGE:
1462         multibyte_strchr = generic_multibyte_strchr;
1463         multibyte_strrchr = generic_multibyte_strrchr;
1464         multibyte_strstr = generic_multibyte_strstr;
1465         multibyte_strtok = generic_multibyte_strtok;
1466         _skip_multibyte_char = skip_generic_multibyte_char;
1467         is_multibyte_char_1 = hangul_is_multibyte_char_1;
1468         is_multibyte_codepage_val = True;
1469         break;
1470     case BIG5_CODEPAGE:
1471         multibyte_strchr = generic_multibyte_strchr;
1472         multibyte_strrchr = generic_multibyte_strrchr;
1473         multibyte_strstr = generic_multibyte_strstr;
1474         multibyte_strtok = generic_multibyte_strtok;
1475         _skip_multibyte_char = skip_generic_multibyte_char;
1476         is_multibyte_char_1 = big5_is_multibyte_char_1;
1477         is_multibyte_codepage_val = True;
1478         break;
1479     case SIMPLIFIED_CHINESE_CODEPAGE:
1480         multibyte_strchr = generic_multibyte_strchr;
1481         multibyte_strrchr = generic_multibyte_strrchr;
1482         multibyte_strstr = generic_multibyte_strstr;
1483         multibyte_strtok = generic_multibyte_strtok;
1484         _skip_multibyte_char = skip_generic_multibyte_char;
1485         is_multibyte_char_1 = simpch_is_multibyte_char_1;
1486         is_multibyte_codepage_val = True;
1487         break;
1488         /*
1489          * Single char size code page.
1490          */
1491     default:
1492         multibyte_strchr = (const char *(*)(const char *, int)) strchr;
1493         multibyte_strrchr = (const char *(*)(const char *, int)) strrchr;
1494         multibyte_strstr = (const char *(*)(const char *, const char *)) strstr;
1495         multibyte_strtok = (char *(*)(char *, const char *)) strtok;
1496         _skip_multibyte_char = skip_non_multibyte_char;
1497         is_multibyte_char_1 = not_multibyte_char_1;
1498         is_multibyte_codepage_val = False;
1499         break;
1500     }
1501 }

/* [previous][next][first][last][top][bottom][index][help]  */