From 60bd665e74cfeeaf70882173a0dd56c883e2014a Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@suren.me>
Date: Fri, 12 Mar 2021 03:55:34 +0100
Subject: Added to git tree

---
 fixes/libguess-fixes/README               |  1 +
 fixes/libguess-fixes/libguess-ds-cn.patch | 62 +++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 fixes/libguess-fixes/README
 create mode 100644 fixes/libguess-fixes/libguess-ds-cn.patch

(limited to 'fixes/libguess-fixes')

diff --git a/fixes/libguess-fixes/README b/fixes/libguess-fixes/README
new file mode 100644
index 0000000..bad7594
--- /dev/null
+++ b/fixes/libguess-fixes/README
@@ -0,0 +1 @@
+This patch adds BIG5 encoding detection into the Chinese detection engine.
diff --git a/fixes/libguess-fixes/libguess-ds-cn.patch b/fixes/libguess-fixes/libguess-ds-cn.patch
new file mode 100644
index 0000000..7c2384b
--- /dev/null
+++ b/fixes/libguess-fixes/libguess-ds-cn.patch
@@ -0,0 +1,62 @@
+diff -dPNur libguess-0.2.0-d7/guess.c libguess-0.2.0-d7-new/guess.c
+--- libguess-0.2.0-d7/guess.c	2006-12-05 17:59:32.000000000 +0100
++++ libguess-0.2.0-d7-new/guess.c	2007-06-26 19:56:59.000000000 +0200
+@@ -44,7 +44,7 @@
+ /* ORDER_** &highest, &second, ... &lowest */
+ #define ORDER_JP &utf8, &sjis, &eucj
+ #define ORDER_TW &utf8, &big5
+-#define ORDER_CN &utf8, &gb2312, &gb18030
++#define ORDER_CN &utf8, &gb2312, &gb18030, &big5
+ #define ORDER_KR &utf8, &euck, &johab
+ 
+ /* workaround for that glib's g_convert can't convert
+@@ -252,6 +252,8 @@
+     guess_dfa gb2312 = DFA_INIT(guess_gb2312_st, guess_gb2312_ar);
+     guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar);
+     guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar);
++    guess_dfa big5 = DFA_INIT(guess_big5_st, guess_big5_ar);
++
+     guess_dfa *top = NULL;
+ 
+     guess_dfa *order[] = { ORDER_CN, NULL };
+@@ -287,22 +289,27 @@
+         }
+ 
+         if (DFA_ALIVE(gb2312)) {
+-            if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030))
++            if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5))
+                 return "GB2312";
+             DFA_NEXT(gb2312, c);
+         }
+         if (DFA_ALIVE(utf8)) {
+-            if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030))
++            if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5))
+                 return "UTF-8";
+             DFA_NEXT(utf8, c);
+         }
+         if (DFA_ALIVE(gb18030)) {
+-            if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312))
++            if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312) && !DFA_ALIVE(big5))
+                 return "GB18030";
+             DFA_NEXT(gb18030, c);
+         }
++        if (DFA_ALIVE(big5)) {
++            if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030))
++                return "big5";
++            DFA_NEXT(big5, c);
++        }
+ 
+-        if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) {
++        if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5)) {
+             /* we ran out the possibilities */
+             return NULL;
+         }
+@@ -323,6 +330,8 @@
+         return "UTF-8";
+     if (top == &gb18030)
+         return "GB18030";
++    if (top == &big5)
++        return "BIG5";
+     return NULL;
+ }
+ 
-- 
cgit v1.2.3