其他
当你需要读一个 47M 的 JSON 文件
https://juejin.cn/post/7253744712409071673
https://github.com/sunnyswag/emoji-search/releases/download/v1.0.0-beta/emoji_embeddings.gz
// embed 向量维度为:1536
{"emoji": "\ud83e\udd47", "message": "1st place medal", "embed": [-0.018469301983714104, -0.004823130089789629, ...]}
{"emoji": "\ud83e\udd48", "message": "2nd place medal", "embed": [-0.023217657580971718, -0.0019081177888438106, ...]}
hw.cpu.ncore 8
hw.device.name OPPO Reno3 Pro 5G
hw.ramSize 8G
image.androidVersion.api 33
context.resources.openRawResource(R.raw.emoji_embeddings).use { inputStream ->
GZIPInputStream(inputStream).bufferedReader().use { bufferedReader ->
bufferedReader.readLines().forEachIndexed { index, line ->
val entity = gson.fromJson(line, EmojiJsonEntity::class.java)
// process entity
}
}
}
}
flow {
context.resources.openRawResource(R.raw.emoji_embeddings_json).use { inputStream ->
GZIPInputStream(inputStream).use { gzipInputStream ->
gzipInputStream.bufferedReader().useLines { lines ->
for (line in lines) {
emit(line)
}
}
}
}
}.flowOn(Dispatchers.IO)
.collect {
val entity = gson.fromJson(it, EmojiJsonEntity::class.java)
// process entity
}
}
val mutex = Mutex()
List(STREAM_SIZE) { i ->
flow {
val resId = getEmbeddingResId(i) // 获取当前的资源文件 Id
context.resources.openRawResource(resId).use { inputStream ->
GZIPInputStream(inputStream).use { gzipInputStream ->
gzipInputStream.bufferedReader().useLines { lines ->
for (line in lines) {
emit(line)
}
}
}
}
}.flowOn(Dispatchers.IO)
}.asFlow()
.flattenMerge(STREAM_SIZE)
.collect { data ->
val entity = gson.fromJson(data, EmojiJsonEntity::class.java)
mutex.withLock {
// process entity
}
}
}
val embeddingDao = getEmbeddingEntityDao(context)
embeddingDao.queryAll()?.forEachIndexed { index, emojiEmbeddingEntity ->
// process entity
}
Unit
}
@TypeConverter
fun fromFloatArray(floatArray: FloatArray): ByteArray {
val byteBuffer = ByteBuffer.allocate(floatArray.size * 4) // Float 是 4 字节
floatArray.forEach { byteBuffer.putFloat(it) }
return byteBuffer.array()
}
@TypeConverter
fun toFloatArray(byteArray: ByteArray): FloatArray {
val byteBuffer = ByteBuffer.wrap(byteArray)
return FloatArray(byteArray.size / 4) { byteBuffer.float } // Float 是 4 字节
}
}
syntax = "proto3";
message EmojiEmbedding {
string emoji = 1;
string message = 2;
repeated float embed = 3;
}
flow {
context.resources.openRawResource(R.raw.emoji_embeddings_proto).use { inputStream ->
GZIPInputStream(inputStream).buffered().use { gzipInputStream ->
while (true) {
EmojiEmbeddingOuterClass.EmojiEmbedding.parseDelimitedFrom(gzipInputStream)?.let {
emit(it)
} ?: break
}
}
}
}.flowOn(Dispatchers.IO)
.buffer()
.flatMapMerge { byteArray ->
flow { emit(readEmojiData(byteArray)) }
}.collect {}
}
private fun readEmojiData(entity: EmojiEmbeddingOuterClass.EmojiEmbedding) {
// process entity
}