Regression fix

- ISO-8859-1 detection code is fixed
author: Suren A. Chilingaryan <csa@dside.dyndns.org> 2007-06-28 20:17:45 +0000
committer: Suren A. Chilingaryan <csa@dside.dyndns.org> 2007-06-28 20:17:45 +0000
commit: 6c1ee4cb430e77b9e7a4d9d8a027b58186cb05e7 (patch)
tree: 2cba917566df25e6a8af4022d86a79374fd2e287
parent: b68103e7018957e6fd25610da1d65deedd825497 (diff)
3 files changed, 24 insertions, 7 deletions
diff --git a/VERSION b/VERSION
index 1a03094..9767cc9 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.9
+0.1.10
diff --git a/src/librcd.c b/src/librcd.c
index 36986cc..b81d27f 100644
--- a/src/librcd.c
+++ b/src/librcd.c
@@ -259,29 +259,36 @@ with latin languages there is in every word besides umlauts should exist at
 least one standard latin character with code < 127. */
 static int check_latin(const unsigned char *buf, int len) {
     long i;
-    int word = 0;
+    int cyr = 0;
     int latin = 0;
     
     for (i=0;i<len;i++) {
 	if (buf[i]<128) {
 	    if (((buf[i]>='a')&&(buf[i]<='z'))||((buf[i]>='A')&&(buf[i]<='Z'))) {
-		    // Latin character inside a word, so it isn't cyrillic word
+		    // Latin character inside a word, so it probably isn't cyrillic word
 		latin++;
 	    } else {
 		    // Treating as a word separator.
-		if (word > 0) {
+		if (cyr > 0) {
 		    if (!latin) return 0;
-		    if ((word/latin)>4) return 0;
+		    if (cyr>latin) return 0;
 		}
 
-		word = 0;
+		cyr = 0;
 		latin = 0;
 	    }
 	} else {
 		// Could be cyrillic word
-	    if (word>=0) word++;
+	    cyr++;
 	}
     }
+    
+    if (cyr > 0) {
+	if (!latin) return 0;
+	if (cyr>latin) return 0;
+    }
+//    printf("C%u:L%u\n",cyr,latin);
+
     return 1;
 }
 
@@ -297,6 +304,15 @@ rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) {
     return is_win_charset2(buf,l);
 }
 
+/*
+rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) {
+    int res;
+    res = rcdGetRussianCharset1(buf, len);
+    printf("%u: %s\n", res, (buf&&!len)?buf:"null");
+    return res;
+}
+*/
+
 /* Compatibility */
 rcd_russian_charset get_russian_charset(const char *buf,int len) {
     return rcdGetRussianCharset(buf, len);
diff --git a/src/librcd.h b/src/librcd.h
index 6fc3281..918d8c0 100644
--- a/src/librcd.h
+++ b/src/librcd.h
@@ -29,6 +29,7 @@ rcdGetRussianCharset
 	1 - KOI8-R
 	2 - UTF8
 	3 - CP866
+	4 - ISO8859-1
 */
 
 rcd_russian_charset rcdGetRussianCharset(const char *buf, int len);
author	Suren A. Chilingaryan <csa@dside.dyndns.org>	2007-06-28 20:17:45 +0000
committer	Suren A. Chilingaryan <csa@dside.dyndns.org>	2007-06-28 20:17:45 +0000
commit	6c1ee4cb430e77b9e7a4d9d8a027b58186cb05e7 (patch)
tree	2cba917566df25e6a8af4022d86a79374fd2e287
parent	b68103e7018957e6fd25610da1d65deedd825497 (diff)