diff options
author | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-06-16 23:19:27 +0000 |
---|---|---|
committer | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-06-16 23:19:27 +0000 |
commit | 70fbe7822024d0acc68df3607ff25bf8d7a71751 (patch) | |
tree | 553cd2ef8cfc936fc890113596db2c4478fe5163 /statgen/test.c | |
download | librcd-70fbe7822024d0acc68df3607ff25bf8d7a71751.tar.gz librcd-70fbe7822024d0acc68df3607ff25bf8d7a71751.tar.bz2 librcd-70fbe7822024d0acc68df3607ff25bf8d7a71751.tar.xz librcd-70fbe7822024d0acc68df3607ff25bf8d7a71751.zip |
initial import
(automatically generated log message)
Diffstat (limited to 'statgen/test.c')
-rw-r--r-- | statgen/test.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/statgen/test.c b/statgen/test.c new file mode 100644 index 0000000..936b491 --- /dev/null +++ b/statgen/test.c @@ -0,0 +1,84 @@ +#include <stdio.h> +#include "charset_auto_russian.h" + +main(int argc, char *argv[]) { + FILE *f; + int len,st; + char word[256],phrase[8192]; + unsigned long a[4]={0,0,0,0}; + int i,max,mw; + + + if ((argc!=2)&&(argc!=3)) { + printf("Usage: %s <file name> [<max words>]\n",argv[0]); + exit(0); + } + + if (argc==3) mw=atoi(argv[2]); + else mw=1; + + f=fopen(argv[1],"r"); + if (!f) { + printf("Failed to open specified file. Check permissions!\n"); + exit(1); + } + + while(!feof(f)) { + strcpy(phrase,""); + for (i=0;i<mw;i++) { + if (i) strcat(phrase," "); + fscanf(f,"%s",&word); + for(st=0;word[st]=='"'||word[st]=='\''||word[st]=='(';st++); + if (strlen(word)<1) continue; + for(len=strlen(word)-1;word[len]==','||word[len]=='.'||word[len]=='!'||word[len]=='?'||word[len]==';'||word[len]=='-'||word[len]==':'||word[len]=='"'||word[len]=='\''||word[len]==')';len--); + if (strlen(word)<1) continue; + else word[len+1]=0; + strcat(phrase,word+st); + } + if (strlen(phrase)<5) continue; + + a[autocharset_russian_uc(phrase,strlen(phrase))]++; +// a[autocharset_russian(phrase,strlen(phrase))]++; + +// a[autocharset_russian(word+st,len+1-st)]++; +// puts(word); + } + + printf("Win: %lu, Koi: %lu, Alt: %lu, UTF: %lu\n",a[0],a[1],a[3],a[2]); + fclose(f); + if (a[0]>a[1]) { + if (a[0]>a[2]) max=0; + else max=2; + } else { + if (a[1]>a[2]) max=1; + else max=2; + } + if (a[3]>max) max=3; + + f=fopen(argv[1],"r"); + while(!feof(f)) { + strcpy(phrase,""); + for (i=0;i<mw;i++) { + if (i) strcat(phrase," "); + fscanf(f,"%s",&word); + for(st=0;word[st]=='"'||word[st]=='\''||word[st]=='(';st++); + if (strlen(word)<1) continue; + for(len=strlen(word)-1;word[len]==','||word[len]=='.'||word[len]=='!'||word[len]=='?'||word[len]==';'||word[len]=='-'||word[len]==':'||word[len]=='"'||word[len]=='\''||word[len]==')';len--); + if (strlen(word)<1) continue; + else word[len+1]=0; + strcat(phrase,word+st); + } + if (strlen(phrase)<5) continue; + + i=autocharset_russian_uc(phrase,strlen(phrase)); +// i=autocharset_russian(phrase,strlen(phrase)); +// i=autocharset_russian(word+st,len+1-st); + if (i!=max) { + if (i==0) printf("Win: %s\n",phrase); + else if (i==1) printf("Koi: %s\n",phrase); + else if (i==2) printf("UTF: %s\n",phrase); + else if (i==3) printf("ALT: %s\n",phrase); + } + } + fclose(f); +} |