diff options
author | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-08-03 01:48:35 +0000 |
---|---|---|
committer | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-08-03 01:48:35 +0000 |
commit | dcd966ba50fa18853c5ae06125a5b08b0ee6b10d (patch) | |
tree | 8147928dbe65fc6b4d83e5cc15d1b3ac5993e0eb /src | |
parent | 8b75f9bb6a09d54d634ff661655659951378aa2c (diff) | |
download | librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.gz librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.bz2 librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.xz librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.zip |
Language Fixes and Improvements
- rccmutex
- Language autodetection fixes and improvements
- Language translation fixes and improvements
- The current state is near to be usable
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 5 | ||||
-rw-r--r-- | src/rccexternal.c | 9 | ||||
-rw-r--r-- | src/rccexternal.h | 2 | ||||
-rw-r--r-- | src/rccmutex.c | 73 | ||||
-rw-r--r-- | src/rccmutex.h | 27 | ||||
-rw-r--r-- | src/rccstring.c | 1 | ||||
-rw-r--r-- | src/rcctranslate.c | 133 | ||||
-rw-r--r-- | src/rcctranslate.h | 8 | ||||
-rw-r--r-- | src/recode.c | 162 |
9 files changed, 344 insertions, 76 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 4ba3c35..0a1fdc1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -7,6 +7,7 @@ librcc_la_SOURCES = librcc.c \ curconfig.c curconfig.h \ rccconfig.c rccconfig.h \ rcclist.c rcclist.h \ + rccmutex.c rccmutex.h \ plugin.c plugin.h \ rccexternal.c rccexternal.h \ fake_enca.h fake_rcd.h \ @@ -23,7 +24,7 @@ librcc_la_SOURCES = librcc.c \ internal.h include_HEADERS = librcc.h -AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ +AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ @PTHREAD_CFLAGS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@ librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/rccexternal.c b/src/rccexternal.c index 4a09948..6a81c56 100644 --- a/src/rccexternal.c +++ b/src/rccexternal.c @@ -42,6 +42,7 @@ #include "internal.h" #define RCC_EXT_PROG_NAME "rccexternal" +#define RCC_EXTERNAL_TIMEOUT 250 /* 100us */ static pid_t pid = (pid_t)-1; static char *addr = NULL; @@ -88,9 +89,13 @@ void rccExternalFree() { } static int rccExternalSetDeadline(struct timeval *tv, unsigned long timeout) { +/* gettimeofday(tv, NULL); tv->tv_sec += (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) / 1000000; tv->tv_usec = (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) % 1000000; +*/ + tv->tv_sec = (timeout + RCC_EXTERNAL_TIMEOUT) / 1000000; + tv->tv_usec = (timeout + RCC_EXTERNAL_TIMEOUT) % 1000000; return 0; } @@ -103,7 +108,7 @@ size_t rccExternalWrite(int s, const char *buffer, ssize_t size, unsigned long t if (s == -1) return -1; - for (writed = 0; (writed < size)&&(connected); writed += connected?res:0) { + for (writed = 0; ((writed < size)&&(connected)); writed += connected?res:0) { FD_ZERO(&fdcon); FD_SET(s, &fdcon); rccExternalSetDeadline(&tv, timeout); @@ -127,7 +132,7 @@ size_t rccExternalRead(int s, char *buffer, ssize_t size, unsigned long timeout) if (s == -1) return -1; - for (readed = 0; (readed < size)&&(connected); readed += connected?res:0) { + for (readed = 0; ((readed < size)&&(connected)); readed += connected?res:0) { FD_ZERO(&fdcon); FD_SET(s, &fdcon); rccExternalSetDeadline(&tv, timeout); diff --git a/src/rccexternal.h b/src/rccexternal.h index bffd6b3..236e2df 100644 --- a/src/rccexternal.h +++ b/src/rccexternal.h @@ -1,8 +1,6 @@ #ifndef _RCC_EXTERNAL_H #define _RCC_EXTERNAL_H -#define RCC_EXTERNAL_TIMEOUT 1000000 - typedef enum rcc_external_module_t { RCC_EXTERNAL_MODULE_CONTROL = 0, RCC_EXTERNAL_MODULE_LIBRTRANSLATE, diff --git a/src/rccmutex.c b/src/rccmutex.c new file mode 100644 index 0000000..e2690fa --- /dev/null +++ b/src/rccmutex.c @@ -0,0 +1,73 @@ +#include <stdlib.h> +#include <time.h> + +#include "rccmutex.h" + +#define RCC_MUTEX_SLEEP 500 + +rcc_mutex rccMutexCreate() { + rcc_mutex mutex; + + mutex = (rcc_mutex)malloc(sizeof(rcc_mutex_s)); + if (mutex) { +#ifdef HAVE_PTHREAD + pthread_mutex_init(&mutex->mutex, NULL); +#else + mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ + } + return mutex; +} + +void rccMutexFree(rcc_mutex mutex) { + if (mutex) { +#ifdef HAVE_PTHREAD + pthread_mutex_destroy(&mutex->mutex); +#endif /* HAVE_PTHREAD */ + free(mutex); + } +} + +int rccMutexLock(rcc_mutex mutex) { +#ifndef HAVE_PTHREAD + struct timespec ts; +#endif /* !HAVE_PTHREAD */ + + if (!mutex) return -1; + +#ifdef HAVE_PTHREAD + return pthread_mutex_lock(&mutex->mutex); +#else + while (mutex->mutex) { + ts.tv_sec = RCC_MUTEX_SLEEP / 1000000; + ts.tv_nsec = (RCC_MUTEX_SLEEP % 1000000)*1000; + nanosleep(&ts, NULL); + } + mutex->mutex = 1; + + return 0; +#endif /* HAVE_PTHREAD */ +} + +int rccMutexTryLock(rcc_mutex mutex) { + if (!mutex) return -1; + +#ifdef HAVE_PTHREAD + return pthread_mutex_trylock(&mutex->mutex); +#else + if (mutex->mutex) return -1; + mutex->mutex = 1; + return 0; +#endif /* HAVE_PTHREAD */ +} + +void rccMutexUnLock(rcc_mutex mutex) { + if (!mutex) return; +#ifdef HAVE_PTHREAD + pthread_mutex_unlock(&mutex->mutex); +#else + mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ +} + + diff --git a/src/rccmutex.h b/src/rccmutex.h new file mode 100644 index 0000000..8585621 --- /dev/null +++ b/src/rccmutex.h @@ -0,0 +1,27 @@ +#ifndef _RCC_MUTEX_H +#define _RCC_MUTEX_H + +#include "../config.h" + +#ifdef HAVE_PTHREAD +# include <pthread.h> +#endif /* HAVE_PTHREAD */ + +struct rcc_mutex_t { +#ifdef HAVE_PTHREAD + pthread_mutex_t mutex; +#else + unsigned char mutex; +#endif /* HAVE_PTHREAD */ +}; +typedef struct rcc_mutex_t rcc_mutex_s; +typedef struct rcc_mutex_t *rcc_mutex; + +rcc_mutex rccMutexCreate(); +void rccMutexFree(rcc_mutex mutex); + +int rccMutexLock(rcc_mutex mutex); +int rccMutexTryLock(rcc_mutex mutex); +void rccMutexUnLock(rcc_mutex mutex); + +#endif /* _RCC_MUTEX_H */ diff --git a/src/rccstring.c b/src/rccstring.c index 9c4c19f..aa92407 100644 --- a/src/rccstring.c +++ b/src/rccstring.c @@ -61,6 +61,7 @@ int rccStringFixID(rcc_string string, rcc_context ctx) { int rccStringChangeID(rcc_string string, rcc_language_id language_id) { if ((!string)&&(language_id != (rcc_language_id)-1)) return -1; +// printf("ChangingID %lu: %s\n", language_id, string); ((rcc_string_header*)string)->language_id = language_id; return 0; } diff --git a/src/rcctranslate.c b/src/rcctranslate.c index d7bb4e4..9dcf411 100644 --- a/src/rcctranslate.c +++ b/src/rcctranslate.c @@ -3,10 +3,12 @@ #include <string.h> #include "internal.h" +#include "rccconfig.h" #include "rccexternal.h" +#include "rccmutex.h" #include "rcctranslate.h" - +#define RCC_TRANSLATE_DEFAULT_TIMEOUT 1000000 /* 1s */ int rccTranslateInit() { @@ -26,18 +28,37 @@ rcc_translate rccTranslateOpen(const char *from, const char *to) { translate = (rcc_translate)malloc(sizeof(rcc_translate_s)); if (!translate) return NULL; + + translate->mutex = rccMutexCreate(); + translate->wmutex = rccMutexCreate(); + if ((!translate->mutex)||(!translate->wmutex)) { + if (translate->mutex) rccMutexFree(translate->mutex); + if (translate->wmutex) rccMutexFree(translate->wmutex); + free(translate); + return NULL; + } translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); if (translate->sock == -1) { + rccMutexFree(translate->mutex); + rccMutexFree(translate->wmutex); free(translate); return NULL; } translate->remaining = 0; + translate->werror = 0; + translate->prefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE; translate->prefix.cmd.size = sizeof(rcc_translate_prefix_s); memcpy(translate->prefix.from, from, 3*sizeof(char)); memcpy(translate->prefix.to, to, 3*sizeof(char)); + + translate->wprefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE; + translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s); + memcpy(translate->wprefix.from, from, 3*sizeof(char)); + memcpy(translate->wprefix.to, to, 3*sizeof(char)); + rccTranslateSetTimeout(translate, RCC_TRANSLATE_DEFAULT_TIMEOUT); return translate; @@ -50,18 +71,40 @@ void rccTranslateClose(rcc_translate translate) { #ifdef HAVE_LIBTRANSLATE if (!translate) return; if (translate->sock != -1) rccExternalClose(translate->sock); + rccMutexFree(translate->mutex); + rccMutexFree(translate->wmutex); free(translate); #endif /* HAVE_LIBTRANSLATE */ } int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) { -#ifdef HAVE_LIBTRANSLATE_TIMED_TRANSLATE if (!translate) return -1; translate->prefix.timeout = us; return 0; -#else - return -1; -#endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */ +} + +#define RCC_UNLOCK_W 1 +#define RCC_UNLOCK_R 2 +#define RCC_UNLOCK_RW 3 +#define RCC_UNLOCK_WR 3 +static char *rccTranslateReturn(rcc_translate translate, char *ret, int unlock) { + if (unlock&RCC_UNLOCK_R) rccMutexUnLock(translate->mutex); + if (unlock&RCC_UNLOCK_W) rccMutexUnLock(translate->wmutex); + return ret; +} +#define rccTranslateReturnNULL(translate, unlock) rccTranslateReturn(translate, NULL, unlock) + +static int rccTranslateQueue(rcc_translate translate, const char *buf) { + size_t len, err; + + + len = strlen(buf); + translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s) + len - sizeof(rcc_external_command_s); + + err = rccExternalWrite(translate->sock, (char*)&translate->wprefix, sizeof(rcc_translate_prefix_s) - 1, 0); + if (!err) err = rccExternalWrite(translate->sock, buf, len + 1, 0); + fsync(translate->sock); + return err?1:0; } char *rccTranslate(rcc_translate translate, const char *buf) { @@ -69,27 +112,57 @@ char *rccTranslate(rcc_translate translate, const char *buf) { rcc_external_command_s resp; size_t err, len; char *buffer; -/* size_t i; -*/ - + if ((!translate)||(!buf)) return NULL; -/* - if (!strcmp(translate->prefix.to, "en")) { - for (i=0;buf[i];i++) + if (!strcmp(translate->prefix.to, rcc_english_language_sn)) { + for (i=0;buf[i];i++) { if ((unsigned char)buf[i]>0x7F) break; + if ((buf[i]>='A')&&(buf[i]<='Z')) break; + if ((buf[i]>='a')&&(buf[i]<='z')) break; + } if (!buf[i]) return NULL; } -*/ + + rccMutexLock(translate->wmutex); + + if (rccMutexTryLock(translate->mutex)) { + if ((translate->werror)||(translate->sock == -1)) return rccTranslateReturnNULL(translate,RCC_UNLOCK_W); + + if (rccTranslateQueue(translate, buf)) translate->werror = 1; + return rccTranslateReturnNULL(translate, RCC_UNLOCK_W); + } + + if (translate->werror) { + rccExternalClose(translate->sock); + translate->sock = -1; + translate->werror = 0; + } if (translate->sock == -1) { translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); - if (translate->sock == -1) return NULL; + if (translate->sock == -1) { + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); + } else { + translate->werror = 0; + translate->remaining = 0; + } } else if (translate->remaining) { if (translate->remaining == (size_t)-1) { err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), 0); - if (err) return NULL; + if (err) { + if (err == sizeof(rcc_external_command_s)) { + if (rccTranslateQueue(translate, buf)) { + rccExternalClose(translate->sock); + translate->sock = -1; + } + } else { + rccExternalClose(translate->sock); + translate->sock = -1; + } + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); + } translate->remaining = resp.size; } @@ -97,13 +170,18 @@ char *rccTranslate(rcc_translate translate, const char *buf) { if (!buffer) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } + err = rccExternalRead(translate->sock, buffer, translate->remaining, 0); free(buffer); if (err) { translate->remaining = err; - return NULL; + if (rccTranslateQueue(translate, buf)) { + rccExternalClose(translate->sock); + translate->sock = -1; + } + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } translate->remaining = 0; } @@ -114,41 +192,50 @@ char *rccTranslate(rcc_translate translate, const char *buf) { if (err) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } err = rccExternalWrite(translate->sock, buf, len + 1, 0); if (err) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } + rccMutexUnLock(translate->wmutex); err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), translate->prefix.timeout); if (err) { if (err == sizeof(rcc_external_command_s)) { translate->remaining = (size_t)-1; } else { + rccMutexLock(translate->wmutex); rccExternalClose(translate->sock); translate->sock = -1; + rccMutexUnLock(translate->wmutex); } - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } - if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) return NULL; + + if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); buffer = (char*)malloc(resp.size*sizeof(char)); if (!buffer) { + rccMutexLock(translate->wmutex); rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + rccMutexUnLock(translate->wmutex); + + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } + err = rccExternalRead(translate->sock, buffer, resp.size, 0); if (err) { translate->remaining = err; free(buffer); - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } - return buffer; + return rccTranslateReturn(translate, buffer, RCC_UNLOCK_R); #else return NULL; #endif /* HAVE_LIBTRANSLATE */ diff --git a/src/rcctranslate.h b/src/rcctranslate.h index 961af6f..b00cdfd 100644 --- a/src/rcctranslate.h +++ b/src/rcctranslate.h @@ -1,9 +1,10 @@ #ifndef _RCC_TRANSLATE_H #define _RCC_TRANSLATE_H +#include "rccmutex.h" #include "rccexternal.h" -#define RCC_TRANSLATE_DEFAULT_TIMEOUT 5000000 /* 5s */ #define RCC_EXTERNAL_COMMAND_TRANSLATE 0x80 +#define RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE 0x81 struct rcc_translate_prefix_t { @@ -19,8 +20,13 @@ typedef struct rcc_translate_prefix_t *rcc_translate_prefix; struct rcc_translate_t { rcc_translate_prefix_s prefix; + rcc_translate_prefix_s wprefix; size_t remaining; + rcc_mutex mutex; + rcc_mutex wmutex; int sock; + + unsigned char werror; }; typedef struct rcc_translate_t rcc_translate_s; diff --git a/src/recode.c b/src/recode.c index 7e12343..d337164 100644 --- a/src/recode.c +++ b/src/recode.c @@ -15,21 +15,34 @@ #include "rccspell.h" #define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a')))) -#define RCC_REQUIRED_PROBABILITY 0.66 +#define RCC_PROBABILITY_STEP 0.10 +#define RCC_REQUIRED_PROBABILITY 0.33 +#define RCC_REQUIRED_LENGTH 5 +#define RCC_ACCEPTABLE_PROBABILITY 0 +#define RCC_ACCEPTABLE_LENGTH 3 rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { - rcc_speller speller; + rcc_speller speller = NULL, english_speller = NULL; unsigned long i, nlanguages; rcc_language_config config, config0 = NULL; rcc_string recoded; unsigned char *utf8; size_t j, mode; - unsigned long words, english, result; + unsigned long spres, words, english, result; + size_t longest; unsigned char english_mode, english_word = 1; + char *english_string = NULL; rcc_language_id english_lang = (rcc_language_id)-1; + size_t english_longest = 0; + unsigned char is_english_string = 1; double res, english_res = 0; rcc_option_value usedb4; - + rcc_language_id bestlang = (rcc_language_id)-1; + unsigned long bestlongest = RCC_ACCEPTABLE_LENGTH; + double bestres = RCC_ACCEPTABLE_PROBABILITY; + char *best_string = NULL; + + unsigned long accepted_nonenglish_langs = 0; usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); @@ -50,6 +63,15 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id nlanguages = ctx->n_languages; + english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn); + if (english_lang != (rcc_language_id)-1) { + config = rccGetUsableConfig(ctx, english_lang); + if (config) { + english_speller = rccConfigGetSpeller(config); + if (rccSpellerGetError(english_speller)) english_speller = NULL; + } + } + for (i=0;i<nlanguages;i++) { config = rccGetUsableConfig(ctx, (rcc_language_id)i); if (!config) continue; @@ -68,11 +90,20 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id else english_mode = 0; utf8 = (char*)rccStringGetString(recoded); - for (result=0,english=0,words=0,mode=0,j=0;utf8[j];j++) { + printf("%s\n", config->language->sn); + + for (result=0,english=0,words=0,longest=0,mode=0,j=0;utf8[j];j++) { if (isSpace(utf8[j])) { if (mode) { - if ((!english_mode)&&(english_word)) english++; - result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; + if ((!english_mode)&&(english_word)&&(rccSpellerSized(english_speller, utf8 + mode -1, j - mode + 1))) + english++; + else { + if ((english_mode)&&(!english_word)) is_english_string = 0; + spres = rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; + printf("%.*s %s\n",j-mode+1,utf8+mode-1, spres?"<======":""); + if ((spres)&&((j - mode + 1)>longest)) longest = j - mode + 1; + result+=spres; + } words++; mode = 0; } else continue; @@ -85,40 +116,89 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id } } } + if (mode) { - result+=rccSpeller(speller, utf8 + mode - 1)?1:0; + if ((!english_mode)&&(english_word)&&(rccSpeller(english_speller, utf8 + mode -1))) + english++; + else { + if ((english_mode)&&(!english_word)) is_english_string = 0; + spres = rccSpeller(speller, utf8 + mode - 1)?1:0; + if ((spres)&&((j-mode+1)>longest)) longest = j - mode + 1; + result += spres; + } words++; } if (english_mode) { + if (english_string) free(english_string); + printf("%u %u\n", result, words); + english_res = 1.*result/words; - english_lang = (rcc_language_id)i; - } else if (words) { - res = 1.*result/words; - if (res > RCC_REQUIRED_PROBABILITY) { + english_lang = (rcc_language_id)i; + english_longest = longest; + english_string = recoded; + } else if (words>english) { + res = 1.*result/(words - english); + printf("%u %u %u\n", result, words, english); + if ((res > RCC_REQUIRED_PROBABILITY)&&(longest > RCC_REQUIRED_LENGTH)) { + if (best_string) free(best_string); + if (english_string) free(english_string); + if (retstring) *retstring = recoded; else free(recoded); return (rcc_language_id)i; - } - if (words > english) { - res = 1.*(result - english)/(words - english); - if (res > RCC_REQUIRED_PROBABILITY) { - if (retstring) *retstring = recoded; - else free(recoded); - return (rcc_language_id)i; - } - } - } - - free(recoded); + } else if ((res > bestres + RCC_PROBABILITY_STEP)|| + ((res > bestres - RCC_PROBABILITY_STEP)&&(longest > bestlongest))|| + ((res > bestres)&&(longest == bestlongest))) { + + if (best_string) free(best_string); + + bestres = res; + bestlang = (rcc_language_id)i; + bestlongest = longest; + best_string = recoded; + } else if (!accepted_nonenglish_langs) { + bestlang = (rcc_language_id)i; + best_string = recoded; + } else free(recoded); + + accepted_nonenglish_langs++; + } else free(recoded); } - if (english_res > RCC_REQUIRED_PROBABILITY) { - if (retstring) { - *retstring = rccCreateString(english_lang, buf, len); - } + if ((is_english_string)&&(english_res > RCC_REQUIRED_PROBABILITY)&&(english_longest > RCC_REQUIRED_LENGTH)) { + if (best_string) free(best_string); + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); return english_lang; } + + if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) { + if (english_string) free(english_string); + if (retstring) *retstring = best_string; + else if (best_string) free(best_string); + return bestlang; + } + + if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { + if (best_string) free(best_string); + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); + return english_lang; + } + + if (best_string) { + if (english_string) free(english_string); + if (retstring) *retstring = best_string; + else if (best_string) free(best_string); + return bestlang; + } else if (best_string) free(best_string); + + if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); + return english_lang; + } else if (english_string) free(english_string); return (rcc_language_id)-1; } @@ -206,9 +286,12 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, */ detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); - if (detected_language_id != (rcc_language_id)-1) return result; + if (detected_language_id != (rcc_language_id)-1) { + printf("Language %i: %s\n", rccStringGetLanguage(result), result); + return result; + } + - err = rccConfigure(ctx); if (err) return NULL; @@ -316,7 +399,6 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s } if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) { - puts("entrans"); if (!config->entrans) { config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn); } @@ -384,7 +466,6 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const const char *from_charset, *to_charset; rcc_charset_id from_charset_id, to_charset_id; rcc_class_type class_type; - rcc_option_value usedb4; if (!ctx) { if (rcc_default_ctx) ctx = rcc_default_ctx; @@ -394,20 +475,9 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const class_type = rccGetClassType(ctx, to); if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding; - if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding; - - usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); - if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { - stmp = rccDb4GetKey(ctx->db4ctx, buf, len); - if (stmp) { - if (rccStringFixID(stmp, ctx)) free(stmp); - else { - result = rccSizedTo(ctx, to, stmp, rlen); - free(stmp); - return result; - } - } - } + if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding; + if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding; + if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding; err = rccConfigure(ctx); if (err) return NULL; |