blob: 4c6e1aa77acee65673098c0972b4bffd7e9b1fad (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
#include <stdio.h>
#include <string.h>
#include <librcc.h>
#define bit(i) (1<<i)
/*
* Latin unicode subset:
* 0x100 - 0x17E
* 0x180 - 0x24F
* 0x1E00 - 0x1EFF
*/
static rcc_autocharset_id AutoengineWestern(rcc_engine_context ctx, const char *sbuf, int len) {
const unsigned char *buf = sbuf;
long i,j;
int bytes=0,rflag=0;
int res=0;
if (!len) len = strlen(buf);
for (i=0;i<len;i++) {
if (buf[i]<128) continue;
if (bytes>0) {
if ((buf[i]&0xC0)==0x80) {
if (rflag) {
// Western is 0x100-0x17e
res++;
}
bytes--;
} else {
res--;
bytes=1-bytes;
rflag=0;
}
} else {
for (j=6;j>=0;j--)
if ((buf[i]&bit(j))==0) break;
if ((j==0)||(j==6)) {
if ((j==6)&&(bytes<0)) bytes++;
else res--;
continue;
}
bytes=6-j;
if (bytes==1) {
// Western Languages (C2-C3)
if (buf[i]==0xC2) rflag=1;
else if (buf[i]==0xC3) rflag=2;
}
}
if ((buf[i]==0xC0)||(buf[i]==0xC1)) {
if (i+1==len) break;
}
}
if (res > 0) return (rcc_autocharset_id)0;
return (rcc_autocharset_id)1;
}
static rcc_engine western_engine = {
"Western", NULL, NULL, &AutoengineWestern, {"UTF-8","ISO8859-1", NULL}
};
rcc_engine *rccGetInfo(const char *lang) {
if (!lang) return NULL;
return &western_engine;
}
|