/* gdkanji.c (Kanji code converter) */ /* written by Masahito Yamaga (yamaga@ipc.chiba-u.ac.jp) */ #include #include #include #include "gd.h" #include "gdhelpers.h" #ifdef HAVE_STDARG_H #include #endif #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV) #include #ifdef HAVE_ERRNO_H #include #endif #endif #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV) #define HAVE_ICONV 1 #endif #define LIBNAME "any2eucjp()" #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS) #ifndef SJISPRE #define SJISPRE 1 #endif #endif #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif #define TRUE 1 #define FALSE 0 #define NEW 1 #define OLD 2 #define ESCI 3 #define NEC 4 #define EUC 5 #define SJIS 6 #define EUCORSJIS 7 #define ASCII 8 #define NEWJISSTR "JIS7" #define OLDJISSTR "jis" #define EUCSTR "eucJP" #define SJISSTR "SJIS" #define ESC 27 #define SS2 142 #ifdef __STDC__ static void debug(const char *format, ...) #else static debug(format, ...) char *format; #endif { #ifdef DEBUG #ifdef HAVE_STDARG_H va_list args; va_start(args, format); fprintf(stdout, "%s: ", LIBNAME); vfprintf(stdout, format, args); fprintf(stdout, "\n"); va_end(args); #endif #endif } #ifdef __STDC__ static void error(const char *format, ...) #else static error(format, ...) char *format; #endif { #ifdef HAVE_STDARG_H va_list args; va_start(args, format); fprintf(stderr, "%s: ", LIBNAME); vfprintf(stderr, format, args); fprintf(stderr, "\n"); va_end(args); #endif } /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */ #ifdef __STDC__ static int DetectKanjiCode(unsigned char *str) #else static int DetectKanjiCode(str) unsigned char *str; #endif { static int whatcode; int c, i; char *lang = NULL; c = '\1'; i = 0; if (whatcode == 0) whatcode = ASCII; while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0') { if ((c = str[i++]) != '\0') { if (c == ESC){ c = str[i++]; if (c == '$') { c = str[i++]; if (c == 'B') whatcode = NEW; else if (c == '@') whatcode = OLD; } else if (c == '(') { c = str[i++]; if (c == 'I') whatcode = ESCI; } else if (c == 'K') whatcode = NEC; } else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159)) whatcode = SJIS; else if (c == SS2) { c = str[i++]; if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252)) whatcode = SJIS; else if (c >= 161 && c <= 223) whatcode = EUCORSJIS; } else if (c >= 161 && c <= 223) { c = str[i++]; if (c >= 240 && c <= 254) whatcode = EUC; else if (c >= 161 && c <= 223) whatcode = EUCORSJIS; else if (c >= 224 && c <= 239) { whatcode = EUCORSJIS; while (c >= 64 && c != '\0' && whatcode == EUCORSJIS) { if (c >= 129) { if (c <= 141 || (c >= 143 && c <= 159)) whatcode = SJIS; else if (c >= 253 && c <= 254) whatcode = EUC; } c = str[i++]; } } else if (c <= 159) whatcode = SJIS; } else if (c >= 240 && c <= 254) whatcode = EUC; else if (c >= 224 && c <= 239) { c = str[i++]; if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)) whatcode = SJIS; else if (c >= 253 && c >= 254) whatcode = EUC; else if (c >= 161 && c <= 252) whatcode = EUCORSJIS; } } } #ifdef DEBUG if (whatcode == ASCII) debug("Kanji code not included."); else if (whatcode == EUCORSJIS) debug("Kanji code not detected."); else debug("Kanji code detected at %d byte.", i); #endif if (whatcode == EUCORSJIS) { if (getenv ("LC_ALL")) lang = getenv ("LC_ALL"); else if (getenv ("LC_CTYPE")) lang = getenv ("LC_CTYPE"); else if (getenv ("LANG")) lang = getenv ("LANG"); if (lang) { if (strcmp (lang, "ja_JP.SJIS") == 0 || #ifdef hpux strcmp (lang, "japanese") == 0 || #endif strcmp (lang, "ja_JP.mscode") == 0 || strcmp (lang, "ja_JP.PCK") == 0) whatcode = SJIS; else if (strncmp (lang, "ja", 2) == 0) #ifdef SJISPRE whatcode = SJIS; #else whatcode = EUC; #endif } } if (whatcode == EUCORSJIS) #ifdef SJISPRE whatcode = SJIS; #else whatcode = EUC; #endif return whatcode; } /* SJIStoJIS() is sjis2jis() by Ken Lunde. */ #ifdef __STDC__ static void SJIStoJIS(int *p1, int *p2) #else static SJIStoJIS(p1, p2) int *p1, *p2; #endif { register unsigned char c1 = *p1; register unsigned char c2 = *p2; register int adjust = c2 < 159; register int rowOffset = c1 < 160 ? 112 : 176; register int cellOffset = adjust ? (31 + (c2 > 127)) : 126; *p1 = ((c1 - rowOffset) << 1) - adjust; *p2 -= cellOffset; } /* han2zen() was derived from han2zen() written by Ken Lunde. */ #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179)) #define IS_HANDAKU(c) (c >= 202 && c <= 206) #ifdef __STDC__ static void han2zen(int *p1, int *p2) #else static han2zen(p1, p2) int *p1, *p2; #endif { int c = *p1; int daku = FALSE; int handaku = FALSE; int mtable[][2] = { {129,66},{129,117},{129,118},{129,65},{129,69}, {131,146}, {131,64},{131,66},{131,68},{131,70},{131,72}, {131,131},{131,133},{131,135}, {131,98},{129,91}, {131,65},{131,67},{131,69},{131,71},{131,73}, {131,74},{131,76},{131,78},{131,80},{131,82}, {131,84},{131,86},{131,88},{131,90},{131,92}, {131,94},{131,96},{131,99},{131,101},{131,103}, {131,105},{131,106},{131,107},{131,108},{131,109}, {131,110},{131,113},{131,116},{131,119},{131,122}, {131,125},{131,126},{131,128},{131,129},{131,130}, {131,132},{131,134},{131,136}, {131,137},{131,138},{131,139},{131,140},{131,141}, {131,143},{131,147}, {129,74},{129,75} }; if (*p2 == 222 && IS_DAKU(*p1)) daku = TRUE; /* Daku-ten */ else if (*p2 == 223 && IS_HANDAKU(*p1)) handaku = TRUE; /* Han-daku-ten */ *p1 = mtable[c - 161][0]; *p2 = mtable[c - 161][1]; if (daku) { if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122)) (*p2)++; else if (*p2 == 131 && *p2 == 69) *p2 = 148; } else if (handaku && *p2 >= 110 && *p2 <= 122) (*p2) += 2; } /* Recast strcpy to handle unsigned chars used below. */ #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B))) #ifdef __STDC__ static void do_convert(unsigned char *to, unsigned char *from, const char *code) #else static do_convert(to, from, code) unsigned char *to, *from; char *code; #endif { #ifdef HAVE_ICONV iconv_t cd; size_t from_len, to_len; if ((cd = iconv_open(EUCSTR, code)) == (iconv_t)-1) { error("iconv_open() error"); #ifdef HAVE_ERRNO_H if (errno == EINVAL) error("invalid code specification: \"%s\" or \"%s\"", EUCSTR, code); #endif strcpy(to, from); return; } from_len = strlen((const char *)from) +1; to_len = BUFSIZ; if (iconv(cd, (const char **)&from, &from_len, (char **)&to, &to_len) == -1) { #ifdef HAVE_ERRNO_H if (errno == EINVAL) error("invalid end of input string"); else if (errno == EILSEQ) error("invalid code in input string"); else if (errno == E2BIG) error("output buffer overflow at do_convert()"); else #endif error("something happen"); strcpy(to, from); return; } if (iconv_close(cd) != 0) { error("iconv_close() error"); } #else int p1, p2, i, j; int jisx0208 = FALSE; int hankaku = FALSE; j = 0; if (strcmp(code, NEWJISSTR) == 0 || strcmp(code, OLDJISSTR) == 0){ for(i=0; from[i] != '\0' && j < BUFSIZ; i++){ if (from[i] == ESC) { i++; if (from[i] == '$') { jisx0208 = TRUE; hankaku = FALSE; i++; }else if (from[i] == '(') { jisx0208 = FALSE; i++; if (from[i] == 'I') /* Hankaku Kana */ hankaku = TRUE; else hankaku = FALSE; } } else { if (jisx0208) to[j++] = from[i] + 128; else if (hankaku) { to[j++] = SS2; to[j++] = from[i] + 128; } else to[j++] = from[i]; } } } else if (strcmp(code, SJISSTR) == 0) { for(i=0; from[i] != '\0' && j < BUFSIZ; i++){ p1 = from[i]; if (p1 < 127) to[j++] = p1; else if ((p1 >= 161) && (p1 <= 223)) { /* Hankaku Kana */ to[j++] = SS2; to[j++] = p1; } else { p2 = from[++i]; SJIStoJIS(&p1, &p2); to[j++] = p1 + 128; to[j++] = p2 + 128; } } } else { error("invalid code specification: \"%s\"", code); return; } if (j >= BUFSIZ) { error("output buffer overflow at do_convert()"); ustrcpy(to, from); } else to[j] = '\0'; #endif /* HAVE_ICONV */ } #ifdef __STDC__ static int do_check_and_conv(unsigned char *to, unsigned char *from) #else static int do_check_and_conv(to, from) unsigned char *to, *from; #endif { static unsigned char tmp[BUFSIZ]; int p1, p2, i, j; int kanji = TRUE; switch (DetectKanjiCode(from)){ case NEW: debug("Kanji code is New JIS."); do_convert(tmp, from, NEWJISSTR); break; case OLD: debug("Kanji code is Old JIS."); do_convert(tmp, from, OLDJISSTR); break; case ESCI: debug("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I."); do_convert(tmp, from, NEWJISSTR); break; case NEC: debug("Kanji code is NEC Kanji."); error("cannot convert NEC Kanji."); ustrcpy(tmp, from); kanji = FALSE; break; case EUC: debug("Kanji code is EUC."); ustrcpy(tmp, from); break; case SJIS: debug("Kanji code is SJIS."); do_convert(tmp, from, SJISSTR); break; case EUCORSJIS: debug("Kanji code is EUC or SJIS."); ustrcpy(tmp, from); kanji = FALSE; break; case ASCII: debug("This is ASCII string."); ustrcpy(tmp, from); kanji = FALSE; break; default: debug("This string includes unknown code."); ustrcpy(tmp, from); kanji = FALSE; break; } /* Hankaku Kana ---> Zenkaku Kana */ if (kanji) { j = 0; for(i = 0; tmp[i] != '\0'&& j < BUFSIZ; i++) { if (tmp[i] == SS2) { p1 = tmp[++i]; if (tmp[i+1] == SS2) { p2 = tmp[i+2]; if (p2 == 222 || p2 == 223) i += 2; else p2 = 0; } else p2 = 0; han2zen(&p1, &p2); SJIStoJIS(&p1, &p2); to[j++] = p1 + 128; to[j++] = p2 + 128; } else to[j++] = tmp[i]; } if (j >= BUFSIZ) { error("output buffer overflow at Hankaku --> Zenkaku"); ustrcpy(to, tmp); } else to[j] = '\0'; } else ustrcpy(to, tmp); return kanji; } #ifdef __STDC__ int any2eucjp(unsigned char *dest, unsigned char *src, unsigned int dest_max) #else int any2eucjp(dest, src, dest_max) unsigned char *dest, *src; unsigned int dest_max; #endif { static unsigned char tmp_dest[BUFSIZ]; int ret; if (strlen((const char *)src) >= BUFSIZ) { error("input string too large"); return -1; } if (dest_max > BUFSIZ) { error("invalid maximum size of destination\nit should be less than %d.", BUFSIZ); return -1; } ret = do_check_and_conv(tmp_dest, src); if (strlen((const char *)tmp_dest) >= dest_max) { error("output buffer overflow"); ustrcpy(dest, src); return -1; } ustrcpy(dest, tmp_dest); return ret; } #if 0 #ifdef __STDC__ unsigned int strwidth(unsigned char *s) #else unsigned int strwidth(s) unsigned char *s; #endif { unsigned char *t; unsigned int i; t = (unsigned char *)gdMalloc(BUFSIZ); any2eucjp(t, s, BUFSIZ); i = strlen(t); gdFree(t); return i; } #endif #ifdef DEBUG int main() { unsigned char input[BUFSIZ]; unsigned char *output; unsigned char *str; int c, i = 0; while ( (c = fgetc(stdin)) != '\n' && i < BUFSIZ ) input[i++] = c; input[i] = '\0'; printf("input : %d bytes\n", strlen(input)); printf("output: %d bytes\n", strwidth(input)); output = (unsigned char *)gdMalloc(BUFSIZ); any2eucjp(output, input, BUFSIZ); str = output; while(*str != '\0') putchar(*(str++)); putchar('\n'); gdFree(output); return 0; } #endif