root/ext/gd/libgd/gdkanji.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. debug
  2. error
  3. DetectKanjiCode
  4. SJIStoJIS
  5. han2zen
  6. do_convert
  7. do_check_and_conv
  8. any2eucjp
  9. strwidth
  10. main

   1 
   2 /* gdkanji.c (Kanji code converter)                            */
   3 /*                 written by Masahito Yamaga (ma@yama-ga.com) */
   4 
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <string.h>
   8 #include "gd.h"
   9 #include "gdhelpers.h"
  10 
  11 #include <stdarg.h>
  12 #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
  13 #include <iconv.h>
  14 #ifdef HAVE_ERRNO_H
  15 #include <errno.h>
  16 #endif
  17 #endif
  18 
  19 #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
  20 #define HAVE_ICONV 1
  21 #endif
  22 
  23 #define LIBNAME "any2eucjp()"
  24 
  25 #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
  26 #ifndef SJISPRE
  27 #define SJISPRE 1
  28 #endif
  29 #endif
  30 
  31 #ifdef TRUE
  32 #undef TRUE
  33 #endif
  34 #ifdef FALSE
  35 #undef FALSE
  36 #endif
  37 
  38 #define TRUE  1
  39 #define FALSE 0
  40 
  41 #define NEW 1
  42 #define OLD 2
  43 #define ESCI 3
  44 #define NEC 4
  45 #define EUC 5
  46 #define SJIS 6
  47 #define EUCORSJIS 7
  48 #define ASCII 8
  49 
  50 #define NEWJISSTR "JIS7"
  51 #define OLDJISSTR "jis"
  52 #define EUCSTR    "eucJP"
  53 #define SJISSTR   "SJIS"
  54 
  55 #define ESC 27
  56 #define SS2 142
  57 
  58 static void
  59 debug (const char *format,...)
  60 {
  61 #ifdef DEBUG
  62   va_list args;
  63 
  64   va_start (args, format);
  65   fprintf (stdout, "%s: ", LIBNAME);
  66   vfprintf (stdout, format, args);
  67   fprintf (stdout, "\n");
  68   va_end (args);
  69 #endif
  70 }
  71 
  72 static void
  73 error (const char *format,...)
  74 {
  75         va_list args;
  76         char *tmp;
  77         TSRMLS_FETCH();
  78 
  79         va_start(args, format);
  80         vspprintf(&tmp, 0, format, args);
  81         va_end(args);
  82         php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", LIBNAME, tmp);
  83         efree(tmp);
  84 }
  85 
  86 /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
  87 
  88 static int
  89 DetectKanjiCode (unsigned char *str)
  90 {
  91   static int whatcode = ASCII;
  92   int oldcode = ASCII;
  93   int c, i;
  94   char *lang = NULL;
  95 
  96   c = '\1';
  97   i = 0;
  98 
  99   if (whatcode != EUCORSJIS && whatcode != ASCII)
 100     {
 101       oldcode = whatcode;
 102       whatcode = ASCII;
 103     }
 104 
 105   while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
 106     {
 107       if ((c = str[i++]) != '\0')
 108         {
 109           if (c == ESC)
 110             {
 111               c = str[i++];
 112               if (c == '$')
 113                 {
 114                   c = str[i++];
 115                   if (c == 'B')
 116                     whatcode = NEW;
 117                   else if (c == '@')
 118                     whatcode = OLD;
 119                 }
 120               else if (c == '(')
 121                 {
 122                   c = str[i++];
 123                   if (c == 'I')
 124                     whatcode = ESCI;
 125                 }
 126               else if (c == 'K')
 127                 whatcode = NEC;
 128             }
 129           else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
 130             whatcode = SJIS;
 131           else if (c == SS2)
 132             {
 133               c = str[i++];
 134               if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
 135                 whatcode = SJIS;
 136               else if (c >= 161 && c <= 223)
 137                 whatcode = EUCORSJIS;
 138             }
 139           else if (c >= 161 && c <= 223)
 140             {
 141               c = str[i++];
 142               if (c >= 240 && c <= 254)
 143                 whatcode = EUC;
 144               else if (c >= 161 && c <= 223)
 145                 whatcode = EUCORSJIS;
 146               else if (c >= 224 && c <= 239)
 147                 {
 148                   whatcode = EUCORSJIS;
 149                   while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
 150                     {
 151                       if (c >= 129)
 152                         {
 153                           if (c <= 141 || (c >= 143 && c <= 159))
 154                             whatcode = SJIS;
 155                           else if (c >= 253 && c <= 254)
 156                             whatcode = EUC;
 157                         }
 158                       c = str[i++];
 159                     }
 160                 }
 161               else if (c <= 159)
 162                 whatcode = SJIS;
 163             }
 164           else if (c >= 240 && c <= 254)
 165             whatcode = EUC;
 166           else if (c >= 224 && c <= 239)
 167             {
 168               c = str[i++];
 169               if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
 170                 whatcode = SJIS;
 171               else if (c >= 253 && c <= 254)
 172                 whatcode = EUC;
 173               else if (c >= 161 && c <= 252)
 174                 whatcode = EUCORSJIS;
 175             }
 176         }
 177     }
 178 
 179 #ifdef DEBUG
 180   if (whatcode == ASCII)
 181     debug ("Kanji code not included.");
 182   else if (whatcode == EUCORSJIS)
 183     debug ("Kanji code not detected.");
 184   else
 185     debug ("Kanji code detected at %d byte.", i);
 186 #endif
 187 
 188   if (whatcode == EUCORSJIS && oldcode != ASCII)
 189     whatcode = oldcode;
 190 
 191   if (whatcode == EUCORSJIS)
 192     {
 193       if (getenv ("LC_ALL"))
 194         lang = getenv ("LC_ALL");
 195       else if (getenv ("LC_CTYPE"))
 196         lang = getenv ("LC_CTYPE");
 197       else if (getenv ("LANG"))
 198         lang = getenv ("LANG");
 199 
 200       if (lang)
 201         {
 202           if (strcmp (lang, "ja_JP.SJIS") == 0 ||
 203 #ifdef hpux
 204               strcmp (lang, "japanese") == 0 ||
 205 #endif
 206               strcmp (lang, "ja_JP.mscode") == 0 ||
 207               strcmp (lang, "ja_JP.PCK") == 0)
 208             whatcode = SJIS;
 209           else if (strncmp (lang, "ja", 2) == 0)
 210 #ifdef SJISPRE
 211             whatcode = SJIS;
 212 #else
 213             whatcode = EUC;
 214 #endif
 215         }
 216     }
 217 
 218   if (whatcode == EUCORSJIS)
 219 #ifdef SJISPRE
 220     whatcode = SJIS;
 221 #else
 222     whatcode = EUC;
 223 #endif
 224 
 225   return whatcode;
 226 }
 227 
 228 /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
 229 
 230 static void
 231 SJIStoJIS (int *p1, int *p2)
 232 {
 233   register unsigned char c1 = *p1;
 234   register unsigned char c2 = *p2;
 235   register int adjust = c2 < 159;
 236   register int rowOffset = c1 < 160 ? 112 : 176;
 237   register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
 238 
 239   *p1 = ((c1 - rowOffset) << 1) - adjust;
 240   *p2 -= cellOffset;
 241 }
 242 
 243 /* han2zen() was derived from han2zen() written by Ken Lunde. */
 244 
 245 #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
 246 #define IS_HANDAKU(c) (c >= 202 && c <= 206)
 247 
 248 static void
 249 han2zen (int *p1, int *p2)
 250 {
 251   int c = *p1;
 252   int daku = FALSE;
 253   int handaku = FALSE;
 254   int mtable[][2] =
 255   {
 256     {129, 66},
 257     {129, 117},
 258     {129, 118},
 259     {129, 65},
 260     {129, 69},
 261     {131, 146},
 262     {131, 64},
 263     {131, 66},
 264     {131, 68},
 265     {131, 70},
 266     {131, 72},
 267     {131, 131},
 268     {131, 133},
 269     {131, 135},
 270     {131, 98},
 271     {129, 91},
 272     {131, 65},
 273     {131, 67},
 274     {131, 69},
 275     {131, 71},
 276     {131, 73},
 277     {131, 74},
 278     {131, 76},
 279     {131, 78},
 280     {131, 80},
 281     {131, 82},
 282     {131, 84},
 283     {131, 86},
 284     {131, 88},
 285     {131, 90},
 286     {131, 92},
 287     {131, 94},
 288     {131, 96},
 289     {131, 99},
 290     {131, 101},
 291     {131, 103},
 292     {131, 105},
 293     {131, 106},
 294     {131, 107},
 295     {131, 108},
 296     {131, 109},
 297     {131, 110},
 298     {131, 113},
 299     {131, 116},
 300     {131, 119},
 301     {131, 122},
 302     {131, 125},
 303     {131, 126},
 304     {131, 128},
 305     {131, 129},
 306     {131, 130},
 307     {131, 132},
 308     {131, 134},
 309     {131, 136},
 310     {131, 137},
 311     {131, 138},
 312     {131, 139},
 313     {131, 140},
 314     {131, 141},
 315     {131, 143},
 316     {131, 147},
 317     {129, 74},
 318     {129, 75}
 319   };
 320 
 321   if (*p2 == 222 && IS_DAKU (*p1))
 322     daku = TRUE;                /* Daku-ten */
 323   else if (*p2 == 223 && IS_HANDAKU (*p1))
 324     handaku = TRUE;             /* Han-daku-ten */
 325 
 326   *p1 = mtable[c - 161][0];
 327   *p2 = mtable[c - 161][1];
 328 
 329   if (daku)
 330     {
 331       if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
 332         (*p2)++;
 333       else if (*p2 == 131 && *p2 == 69)
 334         *p2 = 148;
 335     }
 336   else if (handaku && *p2 >= 110 && *p2 <= 122)
 337     (*p2) += 2;
 338 }
 339 
 340 /* Recast strcpy to handle unsigned chars used below. */
 341 #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
 342 
 343 static void
 344 do_convert (unsigned char *to, unsigned char *from, const char *code)
 345 {
 346 #ifdef HAVE_ICONV
 347   iconv_t cd;
 348   size_t from_len, to_len;
 349 
 350   if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
 351     {
 352       error ("iconv_open() error");
 353 #ifdef HAVE_ERRNO_H
 354       if (errno == EINVAL)
 355         error ("invalid code specification: \"%s\" or \"%s\"",
 356                EUCSTR, code);
 357 #endif
 358       strcpy ((char *) to, (const char *) from);
 359       return;
 360     }
 361 
 362   from_len = strlen ((const char *) from) + 1;
 363   to_len = BUFSIZ;
 364 
 365   if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
 366     {
 367 #ifdef HAVE_ERRNO_H
 368       if (errno == EINVAL)
 369         error ("invalid end of input string");
 370       else if (errno == EILSEQ)
 371         error ("invalid code in input string");
 372       else if (errno == E2BIG)
 373         error ("output buffer overflow at do_convert()");
 374       else
 375 #endif
 376         error ("something happen");
 377       strcpy ((char *) to, (const char *) from);
 378       return;
 379     }
 380 
 381   if (iconv_close (cd) != 0)
 382     {
 383       error ("iconv_close() error");
 384     }
 385 #else
 386   int p1, p2, i, j;
 387   int jisx0208 = FALSE;
 388   int hankaku = FALSE;
 389 
 390   j = 0;
 391   if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
 392     {
 393       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
 394         {
 395           if (from[i] == ESC)
 396             {
 397               i++;
 398               if (from[i] == '$')
 399                 {
 400                   jisx0208 = TRUE;
 401                   hankaku = FALSE;
 402                   i++;
 403                 }
 404               else if (from[i] == '(')
 405                 {
 406                   jisx0208 = FALSE;
 407                   i++;
 408                   if (from[i] == 'I')   /* Hankaku Kana */
 409                     hankaku = TRUE;
 410                   else
 411                     hankaku = FALSE;
 412                 }
 413             }
 414           else
 415             {
 416               if (jisx0208)
 417                 to[j++] = from[i] + 128;
 418               else if (hankaku)
 419                 {
 420                   to[j++] = SS2;
 421                   to[j++] = from[i] + 128;
 422                 }
 423               else
 424                 to[j++] = from[i];
 425             }
 426         }
 427     }
 428   else if (strcmp (code, SJISSTR) == 0)
 429     {
 430       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
 431         {
 432           p1 = from[i];
 433           if (p1 < 127)
 434             to[j++] = p1;
 435           else if ((p1 >= 161) && (p1 <= 223))
 436             {                   /* Hankaku Kana */
 437               to[j++] = SS2;
 438               to[j++] = p1;
 439             }
 440           else
 441             {
 442               p2 = from[++i];
 443               SJIStoJIS (&p1, &p2);
 444               to[j++] = p1 + 128;
 445               to[j++] = p2 + 128;
 446             }
 447         }
 448     }
 449   else
 450     {
 451       error ("invalid code specification: \"%s\"", code);
 452       return;
 453     }
 454 
 455   if (j >= BUFSIZ)
 456     {
 457       error ("output buffer overflow at do_convert()");
 458       ustrcpy (to, from);
 459     }
 460   else
 461     to[j] = '\0';
 462 #endif /* HAVE_ICONV */
 463 }
 464 
 465 static int
 466 do_check_and_conv (unsigned char *to, unsigned char *from)
 467 {
 468   static unsigned char tmp[BUFSIZ];
 469   int p1, p2, i, j;
 470   int kanji = TRUE;
 471 
 472   switch (DetectKanjiCode (from))
 473     {
 474     case NEW:
 475       debug ("Kanji code is New JIS.");
 476       do_convert (tmp, from, NEWJISSTR);
 477       break;
 478     case OLD:
 479       debug ("Kanji code is Old JIS.");
 480       do_convert (tmp, from, OLDJISSTR);
 481       break;
 482     case ESCI:
 483       debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
 484       do_convert (tmp, from, NEWJISSTR);
 485       break;
 486     case NEC:
 487       debug ("Kanji code is NEC Kanji.");
 488       error ("cannot convert NEC Kanji.");
 489       ustrcpy (tmp, from);
 490       kanji = FALSE;
 491       break;
 492     case EUC:
 493       debug ("Kanji code is EUC.");
 494       ustrcpy (tmp, from);
 495       break;
 496     case SJIS:
 497       debug ("Kanji code is SJIS.");
 498       do_convert (tmp, from, SJISSTR);
 499       break;
 500     case EUCORSJIS:
 501       debug ("Kanji code is EUC or SJIS.");
 502       ustrcpy (tmp, from);
 503       kanji = FALSE;
 504       break;
 505     case ASCII:
 506       debug ("This is ASCII string.");
 507       ustrcpy (tmp, from);
 508       kanji = FALSE;
 509       break;
 510     default:
 511       debug ("This string includes unknown code.");
 512       ustrcpy (tmp, from);
 513       kanji = FALSE;
 514       break;
 515     }
 516 
 517   /* Hankaku Kana ---> Zenkaku Kana */
 518   if (kanji)
 519     {
 520       j = 0;
 521       for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
 522         {
 523           if (tmp[i] == SS2)
 524             {
 525               p1 = tmp[++i];
 526               if (tmp[i + 1] == SS2)
 527                 {
 528                   p2 = tmp[i + 2];
 529                   if (p2 == 222 || p2 == 223)
 530                     i += 2;
 531                   else
 532                     p2 = 0;
 533                 }
 534               else
 535                 p2 = 0;
 536               han2zen (&p1, &p2);
 537               SJIStoJIS (&p1, &p2);
 538               to[j++] = p1 + 128;
 539               to[j++] = p2 + 128;
 540             }
 541           else
 542             to[j++] = tmp[i];
 543         }
 544 
 545       if (j >= BUFSIZ)
 546         {
 547           error ("output buffer overflow at Hankaku --> Zenkaku");
 548           ustrcpy (to, tmp);
 549         }
 550       else
 551         to[j] = '\0';
 552     }
 553   else
 554     ustrcpy (to, tmp);
 555 
 556   return kanji;
 557 }
 558 
 559 int
 560 any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
 561 {
 562   static unsigned char tmp_dest[BUFSIZ];
 563   int ret;
 564 
 565   if (strlen ((const char *) src) >= BUFSIZ)
 566     {
 567       error ("input string too large");
 568       return -1;
 569     }
 570   if (dest_max > BUFSIZ)
 571     {
 572       error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
 573       return -1;
 574     }
 575   ret = do_check_and_conv (tmp_dest, src);
 576   if (strlen ((const char *) tmp_dest) >= dest_max)
 577     {
 578       error ("output buffer overflow");
 579       ustrcpy (dest, src);
 580       return -1;
 581     }
 582   ustrcpy (dest, tmp_dest);
 583   return ret;
 584 }
 585 
 586 #if 0
 587 unsigned int
 588 strwidth (unsigned char *s)
 589 {
 590   unsigned char *t;
 591   unsigned int i;
 592 
 593   t = (unsigned char *) gdMalloc (BUFSIZ);
 594   any2eucjp (t, s, BUFSIZ);
 595   i = strlen (t);
 596   gdFree (t);
 597   return i;
 598 }
 599 
 600 #ifdef DEBUG
 601 int
 602 main ()
 603 {
 604   unsigned char input[BUFSIZ];
 605   unsigned char *output;
 606   unsigned char *str;
 607   int c, i = 0;
 608 
 609   while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
 610     input[i++] = c;
 611   input[i] = '\0';
 612 
 613   printf ("input : %d bytes\n", strlen ((const char *) input));
 614   printf ("output: %d bytes\n", strwidth (input));
 615 
 616   output = (unsigned char *) gdMalloc (BUFSIZ);
 617   any2eucjp (output, input, BUFSIZ);
 618   str = output;
 619   while (*str != '\0')
 620     putchar (*(str++));
 621   putchar ('\n');
 622   gdFree (output);
 623 
 624   return 0;
 625 }
 626 #endif
 627 #endif

/* [<][>][^][v][top][bottom][index][help] */