summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-03 01:48:35 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-03 01:48:35 +0000
commitdcd966ba50fa18853c5ae06125a5b08b0ee6b10d (patch)
tree8147928dbe65fc6b4d83e5cc15d1b3ac5993e0eb
parent8b75f9bb6a09d54d634ff661655659951378aa2c (diff)
downloadlibrcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.gz
librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.bz2
librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.tar.xz
librcc-dcd966ba50fa18853c5ae06125a5b08b0ee6b10d.zip
Language Fixes and Improvements
- rccmutex - Language autodetection fixes and improvements - Language translation fixes and improvements - The current state is near to be usable
-rw-r--r--ToDo15
-rw-r--r--configure.in41
-rw-r--r--external/rcclibtranslate.c84
-rw-r--r--src/Makefile.am5
-rw-r--r--src/rccexternal.c9
-rw-r--r--src/rccexternal.h2
-rw-r--r--src/rccmutex.c73
-rw-r--r--src/rccmutex.h27
-rw-r--r--src/rccstring.c1
-rw-r--r--src/rcctranslate.c133
-rw-r--r--src/rcctranslate.h8
-rw-r--r--src/recode.c162
12 files changed, 473 insertions, 87 deletions
diff --git a/ToDo b/ToDo
index fdb843f..026888d 100644
--- a/ToDo
+++ b/ToDo
@@ -1,4 +1,13 @@
0.3.x:
+ - Buffer managment:
+ + SetBufferSize ( 0 - autogrow )
+ - Language autodetection and translation improvements
+ + Look on ofline translation libraries and other possibilities to improove
+ translation and language detection.
+ + Implement ispell support
+ + Configurable timeouts
+
+1.x:
- Common encodings:
+ Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE)
+ Special type of classes to select only from Unicode encodings (or even just specified subset of encodings)
@@ -6,10 +15,8 @@
* rccToEncoding(current_language, *new_language, buf, size)?
* rccFromEncoding(current_language, utf8_language, buf, size)?
* Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions)
- - Buffer managment:
- + SetBufferSize ( 0 - autogrow )
- - Look on ofline translation libraries and other possibilities to improove
- translation and language detection.
+ - Recoding options:
+ + Skip Translation
on request:
- Multibyte(not-UTF8) support for FS classes
diff --git a/configure.in b/configure.in
index 16051b5..013e9ae 100644
--- a/configure.in
+++ b/configure.in
@@ -201,6 +201,45 @@ AM_PATH_ASPELL([
HAVE_ASPELL=no
])
+
+PTHREAD_LIBS=error
+AC_EGREP_CPP(yes,[
+#if (defined(__FreeBSD_cc_version) && __FreeBSD_cc_version <= 500001) || defined(__OpenBSD__)
+ yes
+#endif
+],
+ PTHREAD_CFLAGS="-D_THREAD_SAFE"
+ PTHREAD_LIBS="-pthread"
+)
+if test "x$PTHREAD_LIBS" = "xerror"; then
+ AC_CHECK_LIB(pthread, pthread_mutex_lock, PTHREAD_LIBS="-lpthread")
+fi
+if test "x$PTHREAD_LIBS" = xerror; then
+ AC_CHECK_LIB(pthreads, pthread_mutex_lock, PTHREAD_LIBS="-lpthreads")
+fi
+if test "x$PTHREAD_LIBS" = xerror; then
+ AC_CHECK_LIB(c_r, pthread_mutex_lock, PTHREAD_LIBS="-lc_r")
+fi
+if test "x$PTHREAD_LIBS" = xerror; then
+ AC_CHECK_FUNC(pthread_mutex_lock, PTHREAD_LIBS="")
+fi
+if test "x$PTHREAD_LIBS" = xerror; then
+ PTHREAD_LIBS=""
+ PTHREAD_CFLAGS=""
+else
+ AC_CHECK_HEADER(pthread.h, [
+ AC_DEFINE(HAVE_PTHREAD,1,[Defines if pthread is available])
+ HAVE_PTHREAD=yes
+ ],[
+ HAVE_PTHREAD=no
+ PTHREAD_LIBS=""
+ PTHREAD_CFLAGS=""
+ ])
+fi
+AC_SUBST(PTHREAD_CFLAGS)
+AC_SUBST(PTHREAD_LIBS)
+
+
dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
@@ -211,6 +250,8 @@ AC_OUTPUT(src/Makefile engines/Makefile external/Makefile ui/Makefile examples/M
echo ""
echo "Configuration:"
+echo " POSIX Threading Support: $HAVE_PTHREAD"
+echo ""
echo " Dynamic Engine Loading Support: $HAVE_DLOPEN"
echo " Enca Charset Detection Support: $HAVE_ENCA"
echo " LibRCD Charset Detection Support: $HAVE_RCD"
diff --git a/external/rcclibtranslate.c b/external/rcclibtranslate.c
index 56ce8a2..46fcd6e 100644
--- a/external/rcclibtranslate.c
+++ b/external/rcclibtranslate.c
@@ -45,11 +45,34 @@ static char *rccCreateKey(const char *from, const char *to, const char *data, si
return res;
}
+static char *rccTranslateFixEOL(char *result, const char *text) {
+ size_t i,j;
+ char *res;
+
+ if (!result) return result;
+ if (strstr(text, "\r\n")) return result;
+
+ res = (char*)malloc((strlen(result)+1)*sizeof(char));
+ if (!res) {
+ free(result);
+ return NULL;
+ }
+
+ for (i=0, j=0;result[i];i++) {
+ if ((result[i]=='\r')&&(result[i+1]=='\n')) i++;
+ else res[j++] = result[i];
+ }
+ res[j] = 0;
+ free(result);
+ return res;
+}
+
static void *rccLibPostponed(void *info) {
char *result;
char *data;
char from[3];
char to[3];
+ size_t datalen;
from[2] = 0;
to[2] = 0;
@@ -60,13 +83,21 @@ static void *rccLibPostponed(void *info) {
if (data) {
g_mutex_unlock(mutex);
+ datalen = strlen(data);
+
memcpy(from, data, 2);
memcpy(to, data + 2, 2);
- result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL);
- if (result) {
- rccDb4SetKey(db4ctx, data, strlen(data), result);
- free(result);
+ result = rccDb4GetKey(db4ctx, data, datalen);
+ if (result) free(result);
+ else {
+ result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL);
+
+ if (result) {
+ result = rccTranslateFixEOL(result, data+4);
+ rccDb4SetKey(db4ctx, data, datalen, result);
+ free(result);
+ }
}
free(data);
@@ -164,6 +195,26 @@ void rccLibTranslateFree() {
}
+static void rccLibTranslateQueue(const char *from, const char *to, const char *text) {
+#ifdef HAVE_LIBTRANSLATE
+ char *key = NULL;
+ size_t keysize;
+
+ if ((!session)||(!from)||(!to)||(!text)) return;
+ if ((strlen(from)!=2)||(strlen(to)!=2)) return;
+
+ if (db4ctx) {
+ key = rccCreateKey(from,to,text,&keysize);
+ if (key) {
+ g_mutex_lock(mutex);
+ g_queue_push_tail(queue, key);
+ g_mutex_unlock(mutex);
+ g_cond_signal(cond);
+ }
+ }
+#endif /* HAVE_LIBTRANSLATE */
+}
+
static char *rccLibTranslateDo(const char *from, const char *to, const char *text, unsigned long timeout) {
#ifdef HAVE_LIBTRANSLATE
char *result;
@@ -188,6 +239,8 @@ static char *rccLibTranslateDo(const char *from, const char *to, const char *tex
# else
result = translate_session_translate_text(session, text, from, to, NULL, NULL, NULL);
# endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */
+
+ result = rccTranslateFixEOL(result, text);
if ((db4ctx)&&(key)) {
if (result) {
@@ -242,6 +295,7 @@ void *rccLibTranslate(void *info) {
res = read(s, buffer + readed, size - readed);
if (res<=0) connected = 0;
}
+ if (!connected) goto clear;
prefix.cmd.cmd = 0;
prefix.cmd.size = 0;
@@ -264,14 +318,30 @@ respond:
} else connected = 0;
if (prefix.cmd.size) free(translated);
+clear:
+ free(buffer);
+ } else connected = 0;
+ break;
+ case RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE:
+ size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s);
+ buffer = (char*)malloc(size);
+ if (buffer) {
+ for (readed = 0; (readed < size)&&(connected); readed += res) {
+ res = read(s, buffer + readed, size - readed);
+ if (res<=0) connected = 0;
+ }
+ if ((connected)&&(!prefix.from[2])&&(!prefix.to[2])&&(!buffer[readed-1])) {
+ rccLibTranslateQueue(prefix.from, prefix.to, buffer);
+ }
free(buffer);
} else connected = 0;
break;
default:
- buffer = (char*)malloc(prefix.cmd.size);
+ size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s);
+ buffer = (char*)malloc(size);
if (buffer) {
- for (readed = 0; (readed < prefix.cmd.size)&&(connected); readed += res) {
- res = read(s, buffer + readed, prefix.cmd.size - readed);
+ for (readed = 0; (readed < size)&&(connected); readed += res) {
+ res = read(s, buffer + readed, size - readed);
if (res<=0) connected = 0;
}
free(buffer);
diff --git a/src/Makefile.am b/src/Makefile.am
index 4ba3c35..0a1fdc1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -7,6 +7,7 @@ librcc_la_SOURCES = librcc.c \
curconfig.c curconfig.h \
rccconfig.c rccconfig.h \
rcclist.c rcclist.h \
+ rccmutex.c rccmutex.h \
plugin.c plugin.h \
rccexternal.c rccexternal.h \
fake_enca.h fake_rcd.h \
@@ -23,7 +24,7 @@ librcc_la_SOURCES = librcc.c \
internal.h
include_HEADERS = librcc.h
-AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@
-librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@
+AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ @PTHREAD_CFLAGS@
+librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@
librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@
diff --git a/src/rccexternal.c b/src/rccexternal.c
index 4a09948..6a81c56 100644
--- a/src/rccexternal.c
+++ b/src/rccexternal.c
@@ -42,6 +42,7 @@
#include "internal.h"
#define RCC_EXT_PROG_NAME "rccexternal"
+#define RCC_EXTERNAL_TIMEOUT 250 /* 100us */
static pid_t pid = (pid_t)-1;
static char *addr = NULL;
@@ -88,9 +89,13 @@ void rccExternalFree() {
}
static int rccExternalSetDeadline(struct timeval *tv, unsigned long timeout) {
+/*
gettimeofday(tv, NULL);
tv->tv_sec += (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) / 1000000;
tv->tv_usec = (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) % 1000000;
+*/
+ tv->tv_sec = (timeout + RCC_EXTERNAL_TIMEOUT) / 1000000;
+ tv->tv_usec = (timeout + RCC_EXTERNAL_TIMEOUT) % 1000000;
return 0;
}
@@ -103,7 +108,7 @@ size_t rccExternalWrite(int s, const char *buffer, ssize_t size, unsigned long t
if (s == -1) return -1;
- for (writed = 0; (writed < size)&&(connected); writed += connected?res:0) {
+ for (writed = 0; ((writed < size)&&(connected)); writed += connected?res:0) {
FD_ZERO(&fdcon);
FD_SET(s, &fdcon);
rccExternalSetDeadline(&tv, timeout);
@@ -127,7 +132,7 @@ size_t rccExternalRead(int s, char *buffer, ssize_t size, unsigned long timeout)
if (s == -1) return -1;
- for (readed = 0; (readed < size)&&(connected); readed += connected?res:0) {
+ for (readed = 0; ((readed < size)&&(connected)); readed += connected?res:0) {
FD_ZERO(&fdcon);
FD_SET(s, &fdcon);
rccExternalSetDeadline(&tv, timeout);
diff --git a/src/rccexternal.h b/src/rccexternal.h
index bffd6b3..236e2df 100644
--- a/src/rccexternal.h
+++ b/src/rccexternal.h
@@ -1,8 +1,6 @@
#ifndef _RCC_EXTERNAL_H
#define _RCC_EXTERNAL_H
-#define RCC_EXTERNAL_TIMEOUT 1000000
-
typedef enum rcc_external_module_t {
RCC_EXTERNAL_MODULE_CONTROL = 0,
RCC_EXTERNAL_MODULE_LIBRTRANSLATE,
diff --git a/src/rccmutex.c b/src/rccmutex.c
new file mode 100644
index 0000000..e2690fa
--- /dev/null
+++ b/src/rccmutex.c
@@ -0,0 +1,73 @@
+#include <stdlib.h>
+#include <time.h>
+
+#include "rccmutex.h"
+
+#define RCC_MUTEX_SLEEP 500
+
+rcc_mutex rccMutexCreate() {
+ rcc_mutex mutex;
+
+ mutex = (rcc_mutex)malloc(sizeof(rcc_mutex_s));
+ if (mutex) {
+#ifdef HAVE_PTHREAD
+ pthread_mutex_init(&mutex->mutex, NULL);
+#else
+ mutex->mutex = 0;
+#endif /* HAVE_PTHREAD */
+ }
+ return mutex;
+}
+
+void rccMutexFree(rcc_mutex mutex) {
+ if (mutex) {
+#ifdef HAVE_PTHREAD
+ pthread_mutex_destroy(&mutex->mutex);
+#endif /* HAVE_PTHREAD */
+ free(mutex);
+ }
+}
+
+int rccMutexLock(rcc_mutex mutex) {
+#ifndef HAVE_PTHREAD
+ struct timespec ts;
+#endif /* !HAVE_PTHREAD */
+
+ if (!mutex) return -1;
+
+#ifdef HAVE_PTHREAD
+ return pthread_mutex_lock(&mutex->mutex);
+#else
+ while (mutex->mutex) {
+ ts.tv_sec = RCC_MUTEX_SLEEP / 1000000;
+ ts.tv_nsec = (RCC_MUTEX_SLEEP % 1000000)*1000;
+ nanosleep(&ts, NULL);
+ }
+ mutex->mutex = 1;
+
+ return 0;
+#endif /* HAVE_PTHREAD */
+}
+
+int rccMutexTryLock(rcc_mutex mutex) {
+ if (!mutex) return -1;
+
+#ifdef HAVE_PTHREAD
+ return pthread_mutex_trylock(&mutex->mutex);
+#else
+ if (mutex->mutex) return -1;
+ mutex->mutex = 1;
+ return 0;
+#endif /* HAVE_PTHREAD */
+}
+
+void rccMutexUnLock(rcc_mutex mutex) {
+ if (!mutex) return;
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&mutex->mutex);
+#else
+ mutex->mutex = 0;
+#endif /* HAVE_PTHREAD */
+}
+
+
diff --git a/src/rccmutex.h b/src/rccmutex.h
new file mode 100644
index 0000000..8585621
--- /dev/null
+++ b/src/rccmutex.h
@@ -0,0 +1,27 @@
+#ifndef _RCC_MUTEX_H
+#define _RCC_MUTEX_H
+
+#include "../config.h"
+
+#ifdef HAVE_PTHREAD
+# include <pthread.h>
+#endif /* HAVE_PTHREAD */
+
+struct rcc_mutex_t {
+#ifdef HAVE_PTHREAD
+ pthread_mutex_t mutex;
+#else
+ unsigned char mutex;
+#endif /* HAVE_PTHREAD */
+};
+typedef struct rcc_mutex_t rcc_mutex_s;
+typedef struct rcc_mutex_t *rcc_mutex;
+
+rcc_mutex rccMutexCreate();
+void rccMutexFree(rcc_mutex mutex);
+
+int rccMutexLock(rcc_mutex mutex);
+int rccMutexTryLock(rcc_mutex mutex);
+void rccMutexUnLock(rcc_mutex mutex);
+
+#endif /* _RCC_MUTEX_H */
diff --git a/src/rccstring.c b/src/rccstring.c
index 9c4c19f..aa92407 100644
--- a/src/rccstring.c
+++ b/src/rccstring.c
@@ -61,6 +61,7 @@ int rccStringFixID(rcc_string string, rcc_context ctx) {
int rccStringChangeID(rcc_string string, rcc_language_id language_id) {
if ((!string)&&(language_id != (rcc_language_id)-1)) return -1;
+// printf("ChangingID %lu: %s\n", language_id, string);
((rcc_string_header*)string)->language_id = language_id;
return 0;
}
diff --git a/src/rcctranslate.c b/src/rcctranslate.c
index d7bb4e4..9dcf411 100644
--- a/src/rcctranslate.c
+++ b/src/rcctranslate.c
@@ -3,10 +3,12 @@
#include <string.h>
#include "internal.h"
+#include "rccconfig.h"
#include "rccexternal.h"
+#include "rccmutex.h"
#include "rcctranslate.h"
-
+#define RCC_TRANSLATE_DEFAULT_TIMEOUT 1000000 /* 1s */
int rccTranslateInit() {
@@ -26,18 +28,37 @@ rcc_translate rccTranslateOpen(const char *from, const char *to) {
translate = (rcc_translate)malloc(sizeof(rcc_translate_s));
if (!translate) return NULL;
+
+ translate->mutex = rccMutexCreate();
+ translate->wmutex = rccMutexCreate();
+ if ((!translate->mutex)||(!translate->wmutex)) {
+ if (translate->mutex) rccMutexFree(translate->mutex);
+ if (translate->wmutex) rccMutexFree(translate->wmutex);
+ free(translate);
+ return NULL;
+ }
translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE);
if (translate->sock == -1) {
+ rccMutexFree(translate->mutex);
+ rccMutexFree(translate->wmutex);
free(translate);
return NULL;
}
translate->remaining = 0;
+ translate->werror = 0;
+
translate->prefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE;
translate->prefix.cmd.size = sizeof(rcc_translate_prefix_s);
memcpy(translate->prefix.from, from, 3*sizeof(char));
memcpy(translate->prefix.to, to, 3*sizeof(char));
+
+ translate->wprefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE;
+ translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s);
+ memcpy(translate->wprefix.from, from, 3*sizeof(char));
+ memcpy(translate->wprefix.to, to, 3*sizeof(char));
+
rccTranslateSetTimeout(translate, RCC_TRANSLATE_DEFAULT_TIMEOUT);
return translate;
@@ -50,18 +71,40 @@ void rccTranslateClose(rcc_translate translate) {
#ifdef HAVE_LIBTRANSLATE
if (!translate) return;
if (translate->sock != -1) rccExternalClose(translate->sock);
+ rccMutexFree(translate->mutex);
+ rccMutexFree(translate->wmutex);
free(translate);
#endif /* HAVE_LIBTRANSLATE */
}
int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) {
-#ifdef HAVE_LIBTRANSLATE_TIMED_TRANSLATE
if (!translate) return -1;
translate->prefix.timeout = us;
return 0;
-#else
- return -1;
-#endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */
+}
+
+#define RCC_UNLOCK_W 1
+#define RCC_UNLOCK_R 2
+#define RCC_UNLOCK_RW 3
+#define RCC_UNLOCK_WR 3
+static char *rccTranslateReturn(rcc_translate translate, char *ret, int unlock) {
+ if (unlock&RCC_UNLOCK_R) rccMutexUnLock(translate->mutex);
+ if (unlock&RCC_UNLOCK_W) rccMutexUnLock(translate->wmutex);
+ return ret;
+}
+#define rccTranslateReturnNULL(translate, unlock) rccTranslateReturn(translate, NULL, unlock)
+
+static int rccTranslateQueue(rcc_translate translate, const char *buf) {
+ size_t len, err;
+
+
+ len = strlen(buf);
+ translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s) + len - sizeof(rcc_external_command_s);
+
+ err = rccExternalWrite(translate->sock, (char*)&translate->wprefix, sizeof(rcc_translate_prefix_s) - 1, 0);
+ if (!err) err = rccExternalWrite(translate->sock, buf, len + 1, 0);
+ fsync(translate->sock);
+ return err?1:0;
}
char *rccTranslate(rcc_translate translate, const char *buf) {
@@ -69,27 +112,57 @@ char *rccTranslate(rcc_translate translate, const char *buf) {
rcc_external_command_s resp;
size_t err, len;
char *buffer;
-/*
size_t i;
-*/
-
+
if ((!translate)||(!buf)) return NULL;
-/*
- if (!strcmp(translate->prefix.to, "en")) {
- for (i=0;buf[i];i++)
+ if (!strcmp(translate->prefix.to, rcc_english_language_sn)) {
+ for (i=0;buf[i];i++) {
if ((unsigned char)buf[i]>0x7F) break;
+ if ((buf[i]>='A')&&(buf[i]<='Z')) break;
+ if ((buf[i]>='a')&&(buf[i]<='z')) break;
+ }
if (!buf[i]) return NULL;
}
-*/
+
+ rccMutexLock(translate->wmutex);
+
+ if (rccMutexTryLock(translate->mutex)) {
+ if ((translate->werror)||(translate->sock == -1)) return rccTranslateReturnNULL(translate,RCC_UNLOCK_W);
+
+ if (rccTranslateQueue(translate, buf)) translate->werror = 1;
+ return rccTranslateReturnNULL(translate, RCC_UNLOCK_W);
+ }
+
+ if (translate->werror) {
+ rccExternalClose(translate->sock);
+ translate->sock = -1;
+ translate->werror = 0;
+ }
if (translate->sock == -1) {
translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE);
- if (translate->sock == -1) return NULL;
+ if (translate->sock == -1) {
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
+ } else {
+ translate->werror = 0;
+ translate->remaining = 0;
+ }
} else if (translate->remaining) {
if (translate->remaining == (size_t)-1) {
err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), 0);
- if (err) return NULL;
+ if (err) {
+ if (err == sizeof(rcc_external_command_s)) {
+ if (rccTranslateQueue(translate, buf)) {
+ rccExternalClose(translate->sock);
+ translate->sock = -1;
+ }
+ } else {
+ rccExternalClose(translate->sock);
+ translate->sock = -1;
+ }
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
+ }
translate->remaining = resp.size;
}
@@ -97,13 +170,18 @@ char *rccTranslate(rcc_translate translate, const char *buf) {
if (!buffer) {
rccExternalClose(translate->sock);
translate->sock = -1;
- return NULL;
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
}
+
err = rccExternalRead(translate->sock, buffer, translate->remaining, 0);
free(buffer);
if (err) {
translate->remaining = err;
- return NULL;
+ if (rccTranslateQueue(translate, buf)) {
+ rccExternalClose(translate->sock);
+ translate->sock = -1;
+ }
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
}
translate->remaining = 0;
}
@@ -114,41 +192,50 @@ char *rccTranslate(rcc_translate translate, const char *buf) {
if (err) {
rccExternalClose(translate->sock);
translate->sock = -1;
- return NULL;
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
}
err = rccExternalWrite(translate->sock, buf, len + 1, 0);
if (err) {
rccExternalClose(translate->sock);
translate->sock = -1;
- return NULL;
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW);
}
+ rccMutexUnLock(translate->wmutex);
err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), translate->prefix.timeout);
if (err) {
if (err == sizeof(rcc_external_command_s)) {
translate->remaining = (size_t)-1;
} else {
+ rccMutexLock(translate->wmutex);
rccExternalClose(translate->sock);
translate->sock = -1;
+ rccMutexUnLock(translate->wmutex);
}
- return NULL;
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);
}
- if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) return NULL;
+
+ if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size))
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);
buffer = (char*)malloc(resp.size*sizeof(char));
if (!buffer) {
+ rccMutexLock(translate->wmutex);
rccExternalClose(translate->sock);
translate->sock = -1;
- return NULL;
+ rccMutexUnLock(translate->wmutex);
+
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);
}
+
err = rccExternalRead(translate->sock, buffer, resp.size, 0);
if (err) {
translate->remaining = err;
free(buffer);
- return NULL;
+ return rccTranslateReturnNULL(translate,RCC_UNLOCK_R);
}
- return buffer;
+ return rccTranslateReturn(translate, buffer, RCC_UNLOCK_R);
#else
return NULL;
#endif /* HAVE_LIBTRANSLATE */
diff --git a/src/rcctranslate.h b/src/rcctranslate.h
index 961af6f..b00cdfd 100644
--- a/src/rcctranslate.h
+++ b/src/rcctranslate.h
@@ -1,9 +1,10 @@
#ifndef _RCC_TRANSLATE_H
#define _RCC_TRANSLATE_H
+#include "rccmutex.h"
#include "rccexternal.h"
-#define RCC_TRANSLATE_DEFAULT_TIMEOUT 5000000 /* 5s */
#define RCC_EXTERNAL_COMMAND_TRANSLATE 0x80
+#define RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE 0x81
struct rcc_translate_prefix_t {
@@ -19,8 +20,13 @@ typedef struct rcc_translate_prefix_t *rcc_translate_prefix;
struct rcc_translate_t {
rcc_translate_prefix_s prefix;
+ rcc_translate_prefix_s wprefix;
size_t remaining;
+ rcc_mutex mutex;
+ rcc_mutex wmutex;
int sock;
+
+ unsigned char werror;
};
typedef struct rcc_translate_t rcc_translate_s;
diff --git a/src/recode.c b/src/recode.c
index 7e12343..d337164 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -15,21 +15,34 @@
#include "rccspell.h"
#define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a'))))
-#define RCC_REQUIRED_PROBABILITY 0.66
+#define RCC_PROBABILITY_STEP 0.10
+#define RCC_REQUIRED_PROBABILITY 0.33
+#define RCC_REQUIRED_LENGTH 5
+#define RCC_ACCEPTABLE_PROBABILITY 0
+#define RCC_ACCEPTABLE_LENGTH 3
rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) {
- rcc_speller speller;
+ rcc_speller speller = NULL, english_speller = NULL;
unsigned long i, nlanguages;
rcc_language_config config, config0 = NULL;
rcc_string recoded;
unsigned char *utf8;
size_t j, mode;
- unsigned long words, english, result;
+ unsigned long spres, words, english, result;
+ size_t longest;
unsigned char english_mode, english_word = 1;
+ char *english_string = NULL;
rcc_language_id english_lang = (rcc_language_id)-1;
+ size_t english_longest = 0;
+ unsigned char is_english_string = 1;
double res, english_res = 0;
rcc_option_value usedb4;
-
+ rcc_language_id bestlang = (rcc_language_id)-1;
+ unsigned long bestlongest = RCC_ACCEPTABLE_LENGTH;
+ double bestres = RCC_ACCEPTABLE_PROBABILITY;
+ char *best_string = NULL;
+
+ unsigned long accepted_nonenglish_langs = 0;
usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
@@ -50,6 +63,15 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id
nlanguages = ctx->n_languages;
+ english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn);
+ if (english_lang != (rcc_language_id)-1) {
+ config = rccGetUsableConfig(ctx, english_lang);
+ if (config) {
+ english_speller = rccConfigGetSpeller(config);
+ if (rccSpellerGetError(english_speller)) english_speller = NULL;
+ }
+ }
+
for (i=0;i<nlanguages;i++) {
config = rccGetUsableConfig(ctx, (rcc_language_id)i);
if (!config) continue;
@@ -68,11 +90,20 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id
else english_mode = 0;
utf8 = (char*)rccStringGetString(recoded);
- for (result=0,english=0,words=0,mode=0,j=0;utf8[j];j++) {
+ printf("%s\n", config->language->sn);
+
+ for (result=0,english=0,words=0,longest=0,mode=0,j=0;utf8[j];j++) {
if (isSpace(utf8[j])) {
if (mode) {
- if ((!english_mode)&&(english_word)) english++;
- result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0;
+ if ((!english_mode)&&(english_word)&&(rccSpellerSized(english_speller, utf8 + mode -1, j - mode + 1)))
+ english++;
+ else {
+ if ((english_mode)&&(!english_word)) is_english_string = 0;
+ spres = rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0;
+ printf("%.*s %s\n",j-mode+1,utf8+mode-1, spres?"<======":"");
+ if ((spres)&&((j - mode + 1)>longest)) longest = j - mode + 1;
+ result+=spres;
+ }
words++;
mode = 0;
} else continue;
@@ -85,40 +116,89 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id
}
}
}
+
if (mode) {
- result+=rccSpeller(speller, utf8 + mode - 1)?1:0;
+ if ((!english_mode)&&(english_word)&&(rccSpeller(english_speller, utf8 + mode -1)))
+ english++;
+ else {
+ if ((english_mode)&&(!english_word)) is_english_string = 0;
+ spres = rccSpeller(speller, utf8 + mode - 1)?1:0;
+ if ((spres)&&((j-mode+1)>longest)) longest = j - mode + 1;
+ result += spres;
+ }
words++;
}
if (english_mode) {
+ if (english_string) free(english_string);
+ printf("%u %u\n", result, words);
+
english_res = 1.*result/words;
- english_lang = (rcc_language_id)i;
- } else if (words) {
- res = 1.*result/words;
- if (res > RCC_REQUIRED_PROBABILITY) {
+ english_lang = (rcc_language_id)i;
+ english_longest = longest;
+ english_string = recoded;
+ } else if (words>english) {
+ res = 1.*result/(words - english);
+ printf("%u %u %u\n", result, words, english);
+ if ((res > RCC_REQUIRED_PROBABILITY)&&(longest > RCC_REQUIRED_LENGTH)) {
+ if (best_string) free(best_string);
+ if (english_string) free(english_string);
+
if (retstring) *retstring = recoded;
else free(recoded);
return (rcc_language_id)i;
- }
- if (words > english) {
- res = 1.*(result - english)/(words - english);
- if (res > RCC_REQUIRED_PROBABILITY) {
- if (retstring) *retstring = recoded;
- else free(recoded);
- return (rcc_language_id)i;
- }
- }
- }
-
- free(recoded);
+ } else if ((res > bestres + RCC_PROBABILITY_STEP)||
+ ((res > bestres - RCC_PROBABILITY_STEP)&&(longest > bestlongest))||
+ ((res > bestres)&&(longest == bestlongest))) {
+
+ if (best_string) free(best_string);
+
+ bestres = res;
+ bestlang = (rcc_language_id)i;
+ bestlongest = longest;
+ best_string = recoded;
+ } else if (!accepted_nonenglish_langs) {
+ bestlang = (rcc_language_id)i;
+ best_string = recoded;
+ } else free(recoded);
+
+ accepted_nonenglish_langs++;
+ } else free(recoded);
}
- if (english_res > RCC_REQUIRED_PROBABILITY) {
- if (retstring) {
- *retstring = rccCreateString(english_lang, buf, len);
- }
+ if ((is_english_string)&&(english_res > RCC_REQUIRED_PROBABILITY)&&(english_longest > RCC_REQUIRED_LENGTH)) {
+ if (best_string) free(best_string);
+ if (retstring) *retstring = english_string;
+ else if (english_string) free(english_string);
return english_lang;
}
+
+ if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) {
+ if (english_string) free(english_string);
+ if (retstring) *retstring = best_string;
+ else if (best_string) free(best_string);
+ return bestlang;
+ }
+
+ if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) {
+ if (best_string) free(best_string);
+ if (retstring) *retstring = english_string;
+ else if (english_string) free(english_string);
+ return english_lang;
+ }
+
+ if (best_string) {
+ if (english_string) free(english_string);
+ if (retstring) *retstring = best_string;
+ else if (best_string) free(best_string);
+ return bestlang;
+ } else if (best_string) free(best_string);
+
+ if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) {
+ if (retstring) *retstring = english_string;
+ else if (english_string) free(english_string);
+ return english_lang;
+ } else if (english_string) free(english_string);
return (rcc_language_id)-1;
}
@@ -206,9 +286,12 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,
*/
detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result);
- if (detected_language_id != (rcc_language_id)-1) return result;
+ if (detected_language_id != (rcc_language_id)-1) {
+ printf("Language %i: %s\n", rccStringGetLanguage(result), result);
+ return result;
+ }
+
-
err = rccConfigure(ctx);
if (err) return NULL;
@@ -316,7 +399,6 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s
}
if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) {
- puts("entrans");
if (!config->entrans) {
config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn);
}
@@ -384,7 +466,6 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const
const char *from_charset, *to_charset;
rcc_charset_id from_charset_id, to_charset_id;
rcc_class_type class_type;
- rcc_option_value usedb4;
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
@@ -394,20 +475,9 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const
class_type = rccGetClassType(ctx, to);
if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding;
- if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding;
-
- usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
- if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
- stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
- if (stmp) {
- if (rccStringFixID(stmp, ctx)) free(stmp);
- else {
- result = rccSizedTo(ctx, to, stmp, rlen);
- free(stmp);
- return result;
- }
- }
- }
+ if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding;
+ if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding;
+ if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding;
err = rccConfigure(ctx);
if (err) return NULL;