From 6c1ee4cb430e77b9e7a4d9d8a027b58186cb05e7 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Thu, 28 Jun 2007 20:17:45 +0000 Subject: Regression fix - ISO-8859-1 detection code is fixed --- VERSION | 2 +- src/librcd.c | 28 ++++++++++++++++++++++------ src/librcd.h | 1 + 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/VERSION b/VERSION index 1a03094..9767cc9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.9 +0.1.10 diff --git a/src/librcd.c b/src/librcd.c index 36986cc..b81d27f 100644 --- a/src/librcd.c +++ b/src/librcd.c @@ -259,29 +259,36 @@ with latin languages there is in every word besides umlauts should exist at least one standard latin character with code < 127. */ static int check_latin(const unsigned char *buf, int len) { long i; - int word = 0; + int cyr = 0; int latin = 0; for (i=0;i='a')&&(buf[i]<='z'))||((buf[i]>='A')&&(buf[i]<='Z'))) { - // Latin character inside a word, so it isn't cyrillic word + // Latin character inside a word, so it probably isn't cyrillic word latin++; } else { // Treating as a word separator. - if (word > 0) { + if (cyr > 0) { if (!latin) return 0; - if ((word/latin)>4) return 0; + if (cyr>latin) return 0; } - word = 0; + cyr = 0; latin = 0; } } else { // Could be cyrillic word - if (word>=0) word++; + cyr++; } } + + if (cyr > 0) { + if (!latin) return 0; + if (cyr>latin) return 0; + } +// printf("C%u:L%u\n",cyr,latin); + return 1; } @@ -297,6 +304,15 @@ rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { return is_win_charset2(buf,l); } +/* +rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { + int res; + res = rcdGetRussianCharset1(buf, len); + printf("%u: %s\n", res, (buf&&!len)?buf:"null"); + return res; +} +*/ + /* Compatibility */ rcd_russian_charset get_russian_charset(const char *buf,int len) { return rcdGetRussianCharset(buf, len); diff --git a/src/librcd.h b/src/librcd.h index 6fc3281..918d8c0 100644 --- a/src/librcd.h +++ b/src/librcd.h @@ -29,6 +29,7 @@ rcdGetRussianCharset 1 - KOI8-R 2 - UTF8 3 - CP866 + 4 - ISO8859-1 */ rcd_russian_charset rcdGetRussianCharset(const char *buf, int len); -- cgit v1.2.3