summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Tumaykin <itumaykin@gmail.com>2017-02-05 10:41:13 +0300
committerMichael Palimaka <kensington@gentoo.org>2017-02-11 22:24:29 +1100
commite770290a48c3590d7da7596f35daf758e101d227 (patch)
treedf2dc99a76c90f58752d5218870e46a452cb7b62 /app-i18n
parentmedia-libs/libsfml: Removed old. (diff)
downloadgentoo-e770290a48c3590d7da7596f35daf758e101d227.tar.gz
gentoo-e770290a48c3590d7da7596f35daf758e101d227.tar.bz2
gentoo-e770290a48c3590d7da7596f35daf758e101d227.zip
app-i18n/uchardet: rename from dev-libs/uchardet
Rationale: - provided functionality better fits app-i18n/ category; - uchardet is similar to existing app-i18n/enca and app-i18n/libguess; - uchardet also provides CLI tool, not only a library. Checklist: 1. `git mv dev-libs/uchardet app-i18n/uchardet` ✓ 2. Add entry to profile/updates/1Q-2017 ✓ 3. Update reverse dependencies ✓ 4. Update entries in profiles/ ✓ 5. Update eclasses that reference the package (there's none) ✓ 6. Update metadata.xml entries that reference the package ✓ 7. Update news items that reference the package (there's none) ✓ Gentoo-Bug: https://bugs.gentoo.org/593176
Diffstat (limited to 'app-i18n')
-rw-r--r--app-i18n/uchardet/Manifest2
-rw-r--r--app-i18n/uchardet/files/uchardet-0.0.5-fix-ASCII-detection.patch116
-rw-r--r--app-i18n/uchardet/files/uchardet-0.0.5-fix-return-code-on-error.patch19
-rw-r--r--app-i18n/uchardet/files/uchardet-0.0.5-use-proper-package-name.patch30
-rw-r--r--app-i18n/uchardet/metadata.xml15
-rw-r--r--app-i18n/uchardet/uchardet-0.0.5-r1.ebuild34
-rw-r--r--app-i18n/uchardet/uchardet-0.0.6.ebuild28
-rw-r--r--app-i18n/uchardet/uchardet-9999.ebuild31
8 files changed, 275 insertions, 0 deletions
diff --git a/app-i18n/uchardet/Manifest b/app-i18n/uchardet/Manifest
new file mode 100644
index 000000000000..3cdddba6209d
--- /dev/null
+++ b/app-i18n/uchardet/Manifest
@@ -0,0 +1,2 @@
+DIST uchardet-0.0.5.tar.gz 222864 SHA256 7c5569c8ee1a129959347f5340655897e6a8f81ec3344de0012a243f868eabd1 SHA512 e32ff3e7baa9804199e3ca240ce590fed3fcb539fe4d780c4ec205fa5cbd45415e2c8c8db51d97965f9f9bbaad1f34613d5ed2849aafd9bbc3dda850c0be20ac WHIRLPOOL 737becbbf1be09e049207311c964ee61e78bce3c3cdc31cd5a071a52aef22b5f0d803a243aac8b0f9840c19d27ffbac3e08454ec7a74c2bb85f19f15333e3af6
+DIST uchardet-0.0.6.tar.xz 169192 SHA256 8351328cdfbcb2432e63938721dd781eb8c11ebc56e3a89d0f84576b96002c61 SHA512 eceeadae060bf277e298d709856609dde32921271140dc1fb0a33c7b6e1381033fc2960d616ebbd82c92815936864d2c0743b1b5ea1b7d4a200df87df80d6de5 WHIRLPOOL 3fa915fa768be9cb4002e0a1b84c120db017f59fd0011df36a4853c53b403d5f3839647ab7aff8d8691a43ef0ecc90714475ef6a46a85d20abbd57fba7d90a13
diff --git a/app-i18n/uchardet/files/uchardet-0.0.5-fix-ASCII-detection.patch b/app-i18n/uchardet/files/uchardet-0.0.5-fix-ASCII-detection.patch
new file mode 100644
index 000000000000..c82aee866ebc
--- /dev/null
+++ b/app-i18n/uchardet/files/uchardet-0.0.5-fix-ASCII-detection.patch
@@ -0,0 +1,116 @@
+commit 4c8316f9cfda38d75fb015c0eb40e0eebb03d28f
+Author: Jehan <jehan@girinstud.io>
+Date: Sat Dec 5 21:04:20 2015 +0100
+
+ Nearly-ASCII text with NBSP is still not ASCII.
+
+ There is no "exception" in encoding. The non-breaking space 0xA0 is not
+ ASCII, and therefore returning "ASCII" will later create issues (for
+ instance trying to re-encode with iconv produces an error).
+ This was obviously an explicit decision in original code (according to
+ code comments), probably tied to specifity of the original program from
+ Mozilla. Now we want strict detection.
+ I will return "ISO-8859-1" for "nearly-ASCII texts with NBSP as only
+ exception" (note that I could have returned any ISO-8859 charsets since
+ they all have this character in common).
+
+diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp
+index ab8bae0..ff06b9d 100644
+--- a/src/nsUniversalDetector.cpp
++++ b/src/nsUniversalDetector.cpp
+@@ -47,6 +47,7 @@
+
+ nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
+ {
++ mNbspFound = PR_FALSE;
+ mDone = PR_FALSE;
+ mBestGuess = -1; //illegal value as signal
+ mInTag = PR_FALSE;
+@@ -75,6 +76,7 @@ nsUniversalDetector::~nsUniversalDetector()
+ void
+ nsUniversalDetector::Reset()
+ {
++ mNbspFound = PR_FALSE;
+ mDone = PR_FALSE;
+ mBestGuess = -1; //illegal value as signal
+ mInTag = PR_FALSE;
+@@ -162,9 +164,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
+ PRUint32 i;
+ for (i = 0; i < aLen; i++)
+ {
+- /* Other than 0xA0, if every other character is ASCII, the page is ASCII.
++ /* If every other character is ASCII or 0xA0, we don't run charset
++ * probers.
+ * 0xA0 (NBSP in a few charset) is apparently a rare exception
+- * of non-ASCII character contained in ASCII text. */
++ * of non-ASCII character often contained in nearly-ASCII text. */
+ if (aBuf[i] & '\x80' && aBuf[i] != '\xA0')
+ {
+ /* We got a non-ASCII byte (high-byte) */
+@@ -203,11 +206,19 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
+ }
+ else
+ {
+- //ok, just pure ascii so far
+- if ( ePureAscii == mInputState &&
+- (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
++ /* Just pure ASCII or NBSP so far. */
++ if (aBuf[i] == '\xA0')
+ {
+- //found escape character or HZ "~{"
++ /* ASCII with the only exception of NBSP seems quite common.
++ * I doubt it is really necessary to train a model here, so let's
++ * just make an exception.
++ */
++ mNbspFound = PR_TRUE;
++ }
++ else if (mInputState == ePureAscii &&
++ (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')))
++ {
++ /* We found an escape character or HZ "~{". */
+ mInputState = eEscAscii;
+ }
+ mLastChar = aBuf[i];
+@@ -229,6 +240,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
+ mDone = PR_TRUE;
+ mDetectedCharset = mEscCharSetProber->GetCharSetName();
+ }
++ else if (mNbspFound)
++ {
++ mDetectedCharset = "ISO-8859-1";
++ }
+ else
+ {
+ /* ASCII with the ESC character (or the sequence "~{") is still
+@@ -253,8 +268,17 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
+ break;
+
+ default:
+- /* Pure ASCII */
+- mDetectedCharset = "ASCII";
++ if (mNbspFound)
++ {
++ /* ISO-8859-1 is a good result candidate for ASCII + NBSP.
++ * (though it could have been any ISO-8859 encoding). */
++ mDetectedCharset = "ISO-8859-1";
++ }
++ else
++ {
++ /* Pure ASCII */
++ mDetectedCharset = "ASCII";
++ }
+ break;
+ }
+ return NS_OK;
+diff --git a/src/nsUniversalDetector.h b/src/nsUniversalDetector.h
+index 4d9b460..9f0a4b1 100644
+--- a/src/nsUniversalDetector.h
++++ b/src/nsUniversalDetector.h
+@@ -72,6 +72,7 @@ protected:
+ virtual void Report(const char* aCharset) = 0;
+ virtual void Reset();
+ nsInputState mInputState;
++ PRBool mNbspFound;
+ PRBool mDone;
+ PRBool mInTag;
+ PRBool mStart;
diff --git a/app-i18n/uchardet/files/uchardet-0.0.5-fix-return-code-on-error.patch b/app-i18n/uchardet/files/uchardet-0.0.5-fix-return-code-on-error.patch
new file mode 100644
index 000000000000..3b943afea15d
--- /dev/null
+++ b/app-i18n/uchardet/files/uchardet-0.0.5-fix-return-code-on-error.patch
@@ -0,0 +1,19 @@
+commit 248d6dbd351c22989090d318128cb38b11a89f98
+Author: Jehan <jehan@girinstud.io>
+Date: Thu Jan 21 18:16:42 2016 +0100
+
+ tools: exit with non-zero value on uchardet error.
+
+diff --git a/src/tools/uchardet.cpp b/src/tools/uchardet.cpp
+index 91912a0..bcfa234 100644
+--- a/src/tools/uchardet.cpp
++++ b/src/tools/uchardet.cpp
+@@ -60,7 +60,7 @@ void detect(FILE * fp)
+ if (retval != 0)
+ {
+ fprintf(stderr, "Handle data error.\n");
+- exit(0);
++ exit(1);
+ }
+ }
+ uchardet_data_end(handle);
diff --git a/app-i18n/uchardet/files/uchardet-0.0.5-use-proper-package-name.patch b/app-i18n/uchardet/files/uchardet-0.0.5-use-proper-package-name.patch
new file mode 100644
index 000000000000..b1ed88991cf4
--- /dev/null
+++ b/app-i18n/uchardet/files/uchardet-0.0.5-use-proper-package-name.patch
@@ -0,0 +1,30 @@
+commit b6d872bbec3be7abfccbdfd3d90e784cf7281c55
+Author: Jehan <jehan@girinstud.io>
+Date: Tue Dec 15 21:40:16 2015 +0100
+
+ app: package name wrong in CMakeLists.txt.
+
+ Probably coming from a copy-paste error when the build system was
+ originally created.
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 0b65c49..4f279e1 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -1,6 +1,6 @@
+ ######## Project settings
+ cmake_minimum_required(VERSION 2.8)
+-set (PACKAGE_NAME opencc)
++set (PACKAGE_NAME uchardet)
+ project (${PACKAGE_NAME} CXX C)
+ enable_testing()
+
+@@ -54,7 +54,7 @@ if (DEFINED SYSCONF_INSTALL_DIR)
+ set (DIR_ETC ${SYSCONF_INSTALL_DIR})
+ endif (DEFINED SYSCONF_INSTALL_DIR)
+
+-set (DIR_SHARE_UCHARDET ${DIR_SHARE}/opencc)
++set (DIR_SHARE_UCHARDET ${DIR_SHARE}/uchardet)
+ set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale)
+
+ ######## Configuration
diff --git a/app-i18n/uchardet/metadata.xml b/app-i18n/uchardet/metadata.xml
new file mode 100644
index 000000000000..fea82382a727
--- /dev/null
+++ b/app-i18n/uchardet/metadata.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <maintainer type="person">
+ <email>itumaykin+gentoo@gmail.com</email>
+ <name>Coacher</name>
+ </maintainer>
+ <maintainer type="project">
+ <email>proxy-maint@gentoo.org</email>
+ <name>Proxy Maintainers</name>
+ </maintainer>
+ <upstream>
+ <bugs-to>https://bugs.freedesktop.org/enter_bug.cgi?product=uchardet</bugs-to>
+ </upstream>
+</pkgmetadata>
diff --git a/app-i18n/uchardet/uchardet-0.0.5-r1.ebuild b/app-i18n/uchardet/uchardet-0.0.5-r1.ebuild
new file mode 100644
index 000000000000..89f103fdf396
--- /dev/null
+++ b/app-i18n/uchardet/uchardet-0.0.5-r1.ebuild
@@ -0,0 +1,34 @@
+# Copyright 1999-2016 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=5
+
+inherit cmake-utils
+
+DESCRIPTION="An encoding detector library"
+HOMEPAGE="https://github.com/BYVoid/uchardet https://www.freedesktop.org/wiki/Software/uchardet/"
+SRC_URI="https://github.com/BYVoid/${PN}/archive/v${PV}.tar.gz -> ${P}.tar.gz"
+
+LICENSE="|| ( MPL-1.1 GPL-2+ LGPL-2.1+ )"
+SLOT="0"
+KEYWORDS="alpha amd64 ~arm hppa ppc64 x86"
+IUSE="static-libs test"
+
+PATCHES=(
+ "${FILESDIR}/${P}-fix-ASCII-detection.patch"
+ "${FILESDIR}/${P}-use-proper-package-name.patch"
+ "${FILESDIR}/${P}-fix-return-code-on-error.patch"
+)
+
+src_prepare() {
+ use test || cmake_comment_add_subdirectory test
+ cmake-utils_src_prepare
+}
+
+src_configure() {
+ local mycmakeargs=(
+ $(cmake-utils_use_build static-libs STATIC)
+ )
+ cmake-utils_src_configure
+}
diff --git a/app-i18n/uchardet/uchardet-0.0.6.ebuild b/app-i18n/uchardet/uchardet-0.0.6.ebuild
new file mode 100644
index 000000000000..ed98632dfff8
--- /dev/null
+++ b/app-i18n/uchardet/uchardet-0.0.6.ebuild
@@ -0,0 +1,28 @@
+# Copyright 1999-2016 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=6
+
+inherit cmake-utils
+
+DESCRIPTION="An encoding detector library"
+HOMEPAGE="https://www.freedesktop.org/wiki/Software/uchardet/"
+SRC_URI="https://www.freedesktop.org/software/${PN}/releases/${P}.tar.xz"
+
+LICENSE="|| ( MPL-1.1 GPL-2+ LGPL-2.1+ )"
+SLOT="0"
+KEYWORDS="~alpha ~amd64 ~arm ~hppa ~ppc64 ~x86"
+IUSE="static-libs test"
+
+src_prepare() {
+ cmake-utils_src_prepare
+ use test || cmake_comment_add_subdirectory test
+}
+
+src_configure() {
+ local mycmakeargs=(
+ -DBUILD_STATIC=$(usex static-libs)
+ )
+ cmake-utils_src_configure
+}
diff --git a/app-i18n/uchardet/uchardet-9999.ebuild b/app-i18n/uchardet/uchardet-9999.ebuild
new file mode 100644
index 000000000000..2d53e33d7c36
--- /dev/null
+++ b/app-i18n/uchardet/uchardet-9999.ebuild
@@ -0,0 +1,31 @@
+# Copyright 1999-2016 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=6
+
+inherit cmake-utils git-r3
+
+DESCRIPTION="An encoding detector library"
+HOMEPAGE="https://www.freedesktop.org/wiki/Software/uchardet/"
+EGIT_REPO_URI=(
+ https://anongit.freedesktop.org/git/${PN}/${PN}.git
+ git://anongit.freedesktop.org/${PN}/${PN}
+)
+
+LICENSE="|| ( MPL-1.1 GPL-2+ LGPL-2.1+ )"
+SLOT="0"
+KEYWORDS=""
+IUSE="static-libs test"
+
+src_prepare() {
+ cmake-utils_src_prepare
+ use test || cmake_comment_add_subdirectory test
+}
+
+src_configure() {
+ local mycmakeargs=(
+ -DBUILD_STATIC=$(usex static-libs)
+ )
+ cmake-utils_src_configure
+}