-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Expand file tree
/
Copy pathEncodingDetect.kt
More file actions
87 lines (79 loc) · 2.74 KB
/
EncodingDetect.kt
File metadata and controls
87 lines (79 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package io.legado.app.utils
import android.text.TextUtils
import io.legado.app.lib.icu4j.CharsetDetector
import org.jsoup.Jsoup
import java.io.File
import java.io.FileInputStream
/**
* 自动获取文件的编码
* */
@Suppress("MemberVisibilityCanBePrivate", "unused")
object EncodingDetect {
private val headTagRegex = "(?i)<head>[\\s\\S]*?</head>".toRegex()
private val headOpenBytes = "<head>".toByteArray()
private val headCloseBytes = "</head>".toByteArray()
fun getHtmlEncode(bytes: ByteArray): String {
try {
var head: String? = null
val startIndex = bytes.indexOf(headOpenBytes)
if (startIndex > -1) {
val endIndex = bytes.indexOf(headCloseBytes, startIndex)
if (endIndex > -1) {
head = String(bytes.copyOfRange(startIndex, endIndex + headCloseBytes.size))
}
}
val doc = Jsoup.parseBodyFragment(head ?: headTagRegex.find(String(bytes))!!.value)
val metaTags = doc.getElementsByTag("meta")
var charsetStr: String
for (metaTag in metaTags) {
charsetStr = metaTag.attr("charset")
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr
}
val httpEquiv = metaTag.attr("http-equiv")
if (httpEquiv.equals("content-type", true)) {
val content = metaTag.attr("content")
val idx = content.indexOf("charset=", ignoreCase = true)
charsetStr = if (idx > -1) {
content.substring(idx + "charset=".length)
} else {
content.substringAfter(";")
}
if (!TextUtils.isEmpty(charsetStr)) {
return charsetStr
}
}
}
} catch (ignored: Exception) {
}
return getEncode(bytes)
}
fun getEncode(bytes: ByteArray): String {
val match = CharsetDetector().setText(bytes).detect()
return match?.name ?: "UTF-8"
}
/**
* 得到文件的编码
*/
fun getEncode(filePath: String): String {
return getEncode(File(filePath))
}
/**
* 得到文件的编码
*/
fun getEncode(file: File): String {
val tempByte = getFileBytes(file)
return getEncode(tempByte)
}
private fun getFileBytes(file: File?): ByteArray {
val byteArray = ByteArray(8000)
try {
FileInputStream(file).use {
it.read(byteArray)
}
} catch (e: Exception) {
System.err.println("Error: $e")
}
return byteArray
}
}