其他
Dex起步探索
看雪论坛作者ID:misskings
部分内容摘自:android软件安全权威指南:丰生强
1
dex文件
在转换的过程中,会将所有java字节码中的所有冗余信息组成一个常量池。例如多个class文件中都存在的字符串"hello world"。转换后将单独存放在一个地方,并且所有类共享。包括方法的签名也会组成常量池。我们将编译好的apk文件解压后就能拿到classes.dex文件。
dex文件格式
1、DexFile结构
struct DexFile {
/* odex的头 */
const DexOptHeader* pOptHeader;
/* dex文件头,指定了dex文件的一些数据,记录了其他数据结构在dex文件中的物理偏移 */
const DexHeader* pHeader;
/* 索引结构区 */
const DexStringId* pStringIds;
const DexTypeId* pTypeIds;
const DexFieldId* pFieldIds;
const DexMethodId* pMethodIds;
const DexProtoId* pProtoIds;
/* 真实的数据存放 */
const DexClassDef* pClassDefs;
/* 静态链接数据区 */
const DexLink* pLinkData;
/*
* These are mapped out of the "auxillary" section, and may not be
* included in the file.
*/
const DexClassLookup* pClassLookup;
const void* pRegisterMapPool; // RegisterMapClassPool
/* points to start of DEX file data */
const u1* baseAddr;
/* track memory overhead for auxillary structures */
int overhead;
/* additional app-specific data structures associated with the DEX */
//void* auxData;
};
2、dex_header
struct DexHeader {
u1 magic[8]; /* 表示是一个有效的dex文件。值一般固定为64 65 78 0A 30 33 35 00(dex.035) */
u4 checksum; /* adler32 checksum dex文件的校验和,用来判断文件是否已经损坏或者篡改 */
u1 signature[kSHA1DigestLen]; /* SHA-1 hash 用来识别未经dexopt优化的dex文件*/
u4 fileSize; /* length of entire file 记录了包括dexHeader在内的整个dex文件的大小*/
u4 headerSize; /* offset to start of next section dexHeader占用的字节数,一般都是0x70*/
u4 endianTag; /* 指定dex运行环境的cpu字节序。预设是ENDIAN_CONSTANT等于0x12345678,也就是默认小端字节序 */
u4 linkSize; /* 链接段的大小 */
u4 linkOff; /* 链接段的偏移 */
u4 mapOff; /* DexMapList结构的文件偏移 */
u4 stringIdsSize; /* 下面都是数据段的大小和文件偏移 */
u4 stringIdsOff;
u4 typeIdsSize;
u4 typeIdsOff;
u4 protoIdsSize;
u4 protoIdsOff;
u4 fieldIdsSize;
u4 fieldIdsOff;
u4 methodIdsSize;
u4 methodIdsOff;
u4 classDefsSize;
u4 classDefsOff;
u4 dataSize;
u4 dataOff;
};
另外留意这里DexHeader的结构体的大小是固定0x70字节的。所以有的脱壳工具中会将70 00 00 00来作为特征在内存中查找dex进行脱壳(比如FRIDA-DEXDump的深度检索)。
3、string_ids
struct DexStringId {
u4 stringDataOff; /* 字符串数据偏移 */
};
4、type_ids
struct DexTypeId {
u4 descriptorIdx; /* index into stringIds list for type descriptor */
};
5、proto_ids
struct DexTypeList {
u4 size; /* dexTypeItem的个数 */
DexTypeItem list[1]; /* entries */
};
struct DexTypeItem {
u2 typeIdx; /* DexTypeId的索引 */
};
struct DexProtoId {
u4 shortyIdx; /* DexStringId列表的索引,方法签名字符串,由返回值和参数类型列表组合*/
u4 returnTypeIdx; /* DexTypeId的索引,返回值的类型 */
u4 parametersOff; /* 指向DexTypeList的偏移,参数类型列表 */
};
6、field_ids
struct DexFieldId {
u2 classIdx; /* 类的类型,指向DexTypeId的索引,字段所属的类 */
u2 typeIdx; /* 字段类型,指向DexTypeId的索引,字段的类型 */
u4 nameIdx; /* 字段名,指向DexStringId的索引,字段的名称 */
};
7、method_ids
struct DexMethodId {
u2 classIdx; /* 类的类型,指向DexTypeId的索引,方法所属的类 */
u2 protoIdx; /* 声明类型,指向DexProtoId的索引,方法的签名 */
u4 nameIdx; /* 方法名,指向DexStringId索引,方法的名称 */
};
8、class_def
struct DexClassDef {
u4 classIdx; /* 类的类型,指向DexTypeId的索引 */
u4 accessFlags; /* 访问标志 */
u4 superclassIdx; /* 父类的类型,指向DexTypeId的索引 */
u4 interfacesOff; /* 接口,指向DexTypeList的偏移,如果没有接口的声明和实现,值为0 */
u4 sourceFileIdx; /* 类所在的源文件名,指向DexStringId的索引 */
u4 annotationsOff; /* 注释,根据类型不同会有注解类,注解字段,注解方法,注解参数,没有注解值就是0,指向DexAnnotationsDirectoryItem的结构体 */
u4 classDataOff; /* 类的数据部分,指向DexClassData结构的偏移 */
u4 staticValuesOff; /* 类中的静态数据,指向DexEncodeArray结构的偏移 */
};
下面同样展示一组真实数据:
/* expanded form of a class_data_item header */
struct DexClassDataHeader {
u4 staticFieldsSize; /* 静态字段的个数 */
u4 instanceFieldsSize; /* 实例字段的个数 */
u4 directMethodsSize; /* 直接方法的个数 */
u4 virtualMethodsSize; /* 虚方法的个数 */
};
/* expanded form of encoded_field */
struct DexField {
u4 fieldIdx; /* 指向DexFieldId的索引 */
u4 accessFlags; /* 访问标志 */
};
/* expanded form of encoded_method */
struct DexMethod {
u4 methodIdx; /* 指向DexMethodId的索引 */
u4 accessFlags; /* 访问标志 */
u4 codeOff; /* 指向DexCode结构的偏移 */
};
struct DexClassData {
DexClassDataHeader header; /* 指定字段和方法的个数 */
DexField* staticFields; /* 静态字段 */
DexField* instanceFields; /* 实例字段 */
DexMethod* directMethods; /* 直接方法 */
DexMethod* virtualMethods; /* 虚方法 */
};
struct DexCode {
u2 registersSize; /* 使用寄存器的个数 */
u2 insSize; /* 参数的个数 */
u2 outsSize; /* 调用其他方法时使用的寄存器个数 */
u2 triesSize; /* try/catch语句的个数 */
u4 debugInfoOff; /* 指向调试信息的偏移 */
u4 insnsSize; /* 指令集的个数,以2字节为单位 */
u2 insns[1]; /* 指令集 */
/* 2字节空间用于对齐 */
/* followed by try_item[triesSize] DexTry结构体 */
/* followed by uleb128 handlersSize */
/* followed by catch_handler_item[handlersSize] DexCatchHandler结构体 */
};
9、map_list
struct DexMapItem {
u2 type; /* kDexType开头的类型 */
u2 unused; /* 未使用,用于字节对齐 */
u4 size; /* 数据的大小 */
u4 offset; /* 指定类型数据的文件偏移 */
};
/*
* Direct-mapped "map_list".
*/
struct DexMapList {
u4 size; /* 有多少个DexMapItem */
DexMapItem list[1]; /* entries */
};
enum {
kDexTypeHeaderItem = 0x0000,
kDexTypeStringIdItem = 0x0001,
kDexTypeTypeIdItem = 0x0002,
kDexTypeProtoIdItem = 0x0003,
kDexTypeFieldIdItem = 0x0004,
kDexTypeMethodIdItem = 0x0005,
kDexTypeClassDefItem = 0x0006,
kDexTypeCallSiteIdItem = 0x0007,
kDexTypeMethodHandleItem = 0x0008,
kDexTypeMapList = 0x1000,
kDexTypeTypeList = 0x1001,
kDexTypeAnnotationSetRefList = 0x1002,
kDexTypeAnnotationSetItem = 0x1003,
kDexTypeClassDataItem = 0x2000,
kDexTypeCodeItem = 0x2001,
kDexTypeStringDataItem = 0x2002,
kDexTypeDebugInfoItem = 0x2003,
kDexTypeAnnotationItem = 0x2004,
kDexTypeEncodedArrayItem = 0x2005,
kDexTypeAnnotationsDirectoryItem = 0x2006,
};
//字节排序优化
int dexSwapAndVerify(u1* addr, size_t len)
{
...
if (okay) {
/*
* Look for the map. Swap it and then use it to find and swap
* everything else.
*/
if (pHeader->mapOff != 0) {
DexFile dexFile;
DexMapList* pDexMap = (DexMapList*) (addr + pHeader->mapOff);
okay = okay && swapMap(&state, pDexMap);
okay = okay && swapEverythingButHeaderAndMap(&state, pDexMap);
dexFileSetupBasicPointers(&dexFile, addr);
state.pDexFile = &dexFile;
okay = okay && crossVerifyEverything(&state, pDexMap);
} else {
ALOGE("ERROR: No map found; impossible to byte-swap and verify");
okay = false;
}
}
...
return !okay; // 0 == success
}
static bool swapMap(CheckState* state, DexMapList* pMap)
{
DexMapItem* item = pMap->list;
u4 count;
u4 dataItemCount = 0; // Total count of items in the data section.
u4 dataItemsLeft = state->pHeader->dataSize; // See use below.
u4 usedBits = 0; // Bit set: one bit per section
bool first = true;
u4 lastOffset = 0;
SWAP_FIELD4(pMap->size);
count = pMap->size;
const u4 sizeOfItem = (u4) sizeof(DexMapItem);
CHECK_LIST_SIZE(item, count, sizeOfItem);
while (count--) {
SWAP_FIELD2(item->type);
SWAP_FIELD2(item->unused);
SWAP_FIELD4(item->size);
SWAP_OFFSET4(item->offset);
if (first) {
first = false;
} else if (lastOffset >= item->offset) {
ALOGE("Out-of-order map item: %#x then %#x",
lastOffset, item->offset);
return false;
}
if (item->offset >= state->pHeader->fileSize) {
ALOGE("Map item after end of file: %x, size %#x",
item->offset, state->pHeader->fileSize);
return false;
}
if (isDataSectionType(item->type)) {
u4 icount = item->size;
/*
* This sanity check on the data section items ensures that
* there are no more items than the number of bytes in
* the data section.
*/
if (icount > dataItemsLeft) {
ALOGE("Unrealistically many items in the data section: "
"at least %d", dataItemCount + icount);
return false;
}
dataItemsLeft -= icount;
dataItemCount += icount;
}
u4 bit = mapTypeToBitMask(item->type);
if (bit == 0) {
return false;
}
if ((usedBits & bit) != 0) {
ALOGE("Duplicate map section of type %#x", item->type);
return false;
}
if (item->type == kDexTypeCallSiteIdItem) {
state->pCallSiteIds = item;
} else if (item->type == kDexTypeMethodHandleItem) {
state->pMethodHandleItems = item;
}
usedBits |= bit;
lastOffset = item->offset;
item++;
}
if ((usedBits & mapTypeToBitMask(kDexTypeHeaderItem)) == 0) {
ALOGE("Map is missing header entry");
return false;
}
if ((usedBits & mapTypeToBitMask(kDexTypeMapList)) == 0) {
ALOGE("Map is missing map_list entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeStringIdItem)) == 0)
&& ((state->pHeader->stringIdsOff != 0)
|| (state->pHeader->stringIdsSize != 0))) {
ALOGE("Map is missing string_ids entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeTypeIdItem)) == 0)
&& ((state->pHeader->typeIdsOff != 0)
|| (state->pHeader->typeIdsSize != 0))) {
ALOGE("Map is missing type_ids entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeProtoIdItem)) == 0)
&& ((state->pHeader->protoIdsOff != 0)
|| (state->pHeader->protoIdsSize != 0))) {
ALOGE("Map is missing proto_ids entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeFieldIdItem)) == 0)
&& ((state->pHeader->fieldIdsOff != 0)
|| (state->pHeader->fieldIdsSize != 0))) {
ALOGE("Map is missing field_ids entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeMethodIdItem)) == 0)
&& ((state->pHeader->methodIdsOff != 0)
|| (state->pHeader->methodIdsSize != 0))) {
ALOGE("Map is missing method_ids entry");
return false;
}
if (((usedBits & mapTypeToBitMask(kDexTypeClassDefItem)) == 0)
&& ((state->pHeader->classDefsOff != 0)
|| (state->pHeader->classDefsSize != 0))) {
ALOGE("Map is missing class_defs entry");
return false;
}
state->pDataMap = dexDataMapAlloc(dataItemCount);
if (state->pDataMap == NULL) {
ALOGE("Unable to allocate data map (size %#x)", dataItemCount);
return false;
}
return true;
}
2
案例分析
案例一:fart
1、github
https://github.com/hanbinglengyue/FART
2、功能说明
{name:ooxx,method_idx:1830,offset:180516,code_item_len:24,ins:AQABAAEAAAB+oAMABAAAAHAQwAsAAA4A};
var base64ptr = funcBase64_encode(ptr(codeitemstartaddr), codeitemlength, ptr(base64lengthptr));
var b64content = ptr(base64ptr).readCString(base64lengthptr.readInt());
funcFreeptr(ptr(base64ptr));
var content = "{name:ooxx,method_idx:" + dex_method_index_ + ",offset:" + dex_code_item_offset_ + ",code_item_len:" + codeitemlength + ",ins:" + b64content + "};";
3、使用
4、源码分析
def main():
#加载dex文件
dex = dex_parser(filename)
if __name__ == "__main__":
#获取到参数filename和insfilename
init()
methodTable.clear()
#加载.bin文件的内容,也就是insfilename设置的文件,给methodTable填充上值。每项的数据是:方法名称,方法id,方法偏移,方法大小,指令集
parseinsfile()
print "methodTable length:" + str(len(methodTable))
#开始处理
main()
def parseinsfile():
global insfilename
insfile=open(insfilename)
content=insfile.read()
insfile.close()
#;{name:artMethod::dumpmethod DexFile_dumpDexFile'
# dexfile name:classes.dex--
# insfilepath:/data/data/com.wlqq/10668484_ins.bin--
# code_item_len:40,
# code_item_len:40,
# ins:AgABAAIAAABLnY4ADAAAACIAFwNwEPoOAABuIP4OEAAMAR8BFwMRAQ==};
insarray=re.findall(r"{name:(.*?),method_idx:(.*?),offset:(.*?),code_item_len:(.*?),ins:(.*?)}",content) #(.*?)最短匹配
#按照我们前面看到的格式进行匹配数据并遍历
for eachins in insarray:
#这里其实是固定的ooxx
methodname=eachins[0].replace(" ","")
number=(int)(eachins[1])
offset=(int)(eachins[2])
inssize=int(eachins[3])
ins=eachins[4]
tempmethod=CodeItem(number,methodname,inssize,ins)
methodTable[number]=tempmethod #添加method
class dex_parser:
def __init__(self,filename):
#dex的标志
global DEX_MAGIC
#odex的标志
global DEX_OPT_MAGIC
self.m_javaobject_id = 0
#dex的文件路径
self.m_filename = filename
self.m_fd = open(filename,"rb")
self.m_content = self.m_fd.read()
self.m_fd.close()
self.m_dex_optheader = None
self.m_class_name_id = {}
self.string_table = []
#如果发现是odex文件,则填充opt_header,否则只填充dex_header
if self.m_content[0:4] == DEX_OPT_MAGIC:
self.init_optheader(self.m_content)
self.init_header(self.m_content,0x40)
elif self.m_content[0:4] == DEX_MAGIC:
self.init_header(self.m_content,0)
#上面填充dex_header的时候取到的string_ids的偏移位置和大小
bOffset = self.m_stringIdsOff
if self.m_stringIdsSize > 0:
#遍历字符串列表
for i in xrange(0,self.m_stringIdsSize):
#这里是取出每个字符串的偏移地址
offset, = struct.unpack_from("I",self.m_content,bOffset + i * 4)
#如果是第一个则直接存放到start,然后处理下一次
if i == 0:
start = offset
else:
#取出上一个偏移的字符串的偏移地址,这里由于存储格式是uleb128的。要转换成真正的偏移地址。
skip, length = get_uleb128(self.m_content[start:start+5])
#上面还原出了真实的偏移,这里取字符串的地址,保存起来
self.string_table.append(self.m_content[start+skip:offset-1])
start = offset
#处理最后的一条
for i in xrange(start,len(self.m_content)):
if self.m_content[i]==chr(0):
self.string_table.append(self.m_content[start+1:i])
break
#遍历classDef。填充m_class_name_id
for i in xrange(0,self.m_classDefSize):
str1 = self.getclassname(i)
self.m_class_name_id[str1] = i
#遍历classDef,填充classdef相关的属性,并且打印,合并.bin的内容是在print中进行的。
for i in xrange(0,self.m_classDefSize):
str1 = self.getclassname(i)
dex_class(self,i).printf(self)
pass
大致意思就是例如第一个字节的最高位,如果是1,则第二个字节也是有效数据,如果第二个字节的最高位也是1,下一个字节也是有效数据,如果最高位不是1,就结束了。最后左移拼接就ok了。
def varint_encode(number):
buf = b''
while True:
towrite = number & 0x7f
number >>= 7
if number:
buf += struct.pack("B",(towrite | 0x80))
else:
buf += struct.pack("B",towrite)
break
return buf
def varint_decode(buff):
shift = 0
result = 0
idx=0
while True:
if idx>len(buff):
return ""
i = buff[idx]
idx+=1
result |= (i & 0x7f) << shift
shift += 7
if not (i & 0x80):
break
return result
class dex_class:
def __init__(self,dex_object,classid):
if classid >= dex_object.m_classDefSize:
return ""
offset = dex_object.m_classDefOffset + classid * struct.calcsize("8I")
self.offset = offset
format = "I"
self.thisClass,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.modifiers,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.superClass,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.interfacesOff,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.sourceFileIdx,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.annotationsOff,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.classDataOff,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.staticValuesOff,=struct.unpack_from(format,dex_object.m_content,offset)
offset += struct.calcsize(format)
self.index = classid
self.interfacesSize = 0
if self.interfacesOff != 0:
self.interfacesSize, = struct.unpack_from("I",dex_object.m_content,self.interfacesOff)
if self.classDataOff != 0:
offset = self.classDataOff
count,self.numStaticFields = get_uleb128(dex_object.m_content[offset:])
offset += count
count,self.numInstanceFields = get_uleb128(dex_object.m_content[offset:])
offset += count
count,self.numDirectMethods = get_uleb128(dex_object.m_content[offset:])
offset += count
count,self.numVirtualMethods = get_uleb128(dex_object.m_content[offset:])
else:
self.numStaticFields = 0
self.numInstanceFields = 0
self.numDirectMethods = 0
self.numVirtualMethods = 0
def printf(self,dex_object):
...
print "=========numDirectMethods[%d]=numVirtualMethods[%d]=numStaticMethods[0]========="%(self.numDirectMethods,self.numVirtualMethods)
method_idx = 0
# 遍历实例函数
for i in xrange(0,self.numDirectMethods):
#获取到method_id
n,method_idx_diff = get_uleb128(dex_object.m_content[offset:offset+5])
offset += n
n,access_flags = get_uleb128(dex_object.m_content[offset:offset+5])
offset += n
n,code_off = get_uleb128(dex_object.m_content[offset:offset+5])
offset += n
#这里看到method_idx实际上是累加的。我们可以找例子去看下。例如第一个函数的idx是0xd2。第二个的idx是1,实际上是0xd3。也就是上一个的值+1
method_idx += method_idx_diff
if code_off != 0:
methodname=dex_object.getmethodfullname(method_idx,True).replace("::",".").replace(" ","")
method=None
try:
#这里获取了之前的.bin文件中对应的数据
method = methodTable[method_idx]
except Exception as e:
pass
if method != None:
#如果bin文件中有这个函数,则先打印下dex中的对应函数
print "\nDirectMethod:" + dex_object.getmethodfullname(method_idx, True) + "\n"
try:
print "before repire method+++++++++++++++++++++++++++++++++++\n"
method_code(dex_object, code_off).printf(dex_object, "\t\t")
except Exception as e:
print e
#然后再把bin文件中的给转换成method_code,然后打印
try:
bytearray_str = base64.b64decode(method.insarray)
print "after repire method++++++++++++++++++++++++++++++++++++\n"
repired_method_code(dex_object, bytearray_str).printf(dex_object, "\t\t")
except Exception as e:
print e
...
class repired_method_code:
dex_obj=None
content = ""
trylist = []
def __init__(self, dex_obj,content):
offset=0
format = "H"
self.dex_obj=dex_obj
self.content=content
# 这段数据我们之前看到就是存的DexCode的结构体,所以直接按格式进行读取数据出来
self.registers_size, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
self.ins_size, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
self.outs_size, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
self.tries_size, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
format = "I"
self.debug_info_off, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
self.insns_size, = struct.unpack_from(format, content, offset)
offset += struct.calcsize(format)
self.insns = offset
offset += 2 * self.insns_size
if self.insns_size % 2 == 1:
offset += 2
if self.tries_size == 0:
self.tries = 0
self.handlers = 0
else:
self.handlerlist_offset = offset + 8 * self.tries_size
self.tries = offset
for i in range(0, self.tries_size):
temptryitem = tryitem(self.dex_obj,content, self.handlerlist_offset, offset + 8 * i)
self.trylist.append(temptryitem)
self.handlers = offset + self.tries_size * struct.calcsize("I2H") #
案例二:dex2jar
1、github
2、功能说明
3、使用
4、源码分析
@BaseCmd.Syntax(cmd = "d2j-dex2jar", syntax = "[options] <file0> [file1 ... fileN]", desc = "convert dex to jar")
public class Dex2jarCmd extends BaseCmd {
public static void main(String... args) {
new Dex2jarCmd().doMain(args);
}
...
}
public void doMain(String... args) {
...
doCommandLine();
...
}
protected void doCommandLine() throws Exception {
...
for (String fileName : remainingArgs) {
// long baseTS = System.currentTimeMillis();
String baseName = getBaseName(new File(fileName).toPath());
//这里看到默认使用的jar文件名
Path file = output == null ? currentDir.resolve(baseName + "-dex2jar.jar") : output;
System.err.println("dex2jar " + fileName + " -> " + file);
//BaseDexFileReader这个类型就是dex的所有解析处理
BaseDexFileReader reader = MultiDexFileReader.open(Files.readAllBytes(new File(fileName).toPath()));
//用来处理异常信息的
BaksmaliBaseDexExceptionHandler handler = notHandleException ? null : new BaksmaliBaseDexExceptionHandler();
//这句就是最关键的转换部分,前面都是设置转换的一些参数,最后的to用来执行dex转换jar
Dex2jar.from(reader).withExceptionHandler(handler).reUseReg(reuseReg).topoLogicalSort()
.skipDebug(!debugInfo).optimizeSynchronized(this.optmizeSynchronized).printIR(printIR)
.noCode(noCode).skipExceptions(skipExceptions).to(file);
//有异常的话,就通过异常处理handler保存信息
if (!notHandleException) {
if (handler.hasException()) {
Path errorFile = exceptionFile == null ? currentDir.resolve(baseName + "-error.zip")
: exceptionFile;
System.err.println("Detail Error Information in File " + errorFile);
System.err.println(BaksmaliBaseDexExceptionHandler.REPORT_MESSAGE);
handler.dump(errorFile, orginalArgs);
}
}
// long endTS = System.currentTimeMillis();
// System.err.println(String.format("%.2f", (float) (endTS - baseTS) / 1000));
}
}
public static BaseDexFileReader open(byte[] data) throws IOException {
//dex文件太小就直接抛出异常
if (data.length < 3) {
throw new IOException("File too small to be a dex/zip");
}
if ("dex".equals(new String(data, 0, 3, StandardCharsets.ISO_8859_1))) {// dex
return new DexFileReader(data);
} else if ("PK".equals(new String(data, 0, 2, StandardCharsets.ISO_8859_1))) {// ZIP
//可以看到如果是zip文件,它会帮我们在里面找classes开头并且.dex结尾的文件来处理,也就是说直接参数使用apk也是可以的。
TreeMap<String, DexFileReader> dexFileReaders = new TreeMap<>();
try (ZipFile zipFile = new ZipFile(data)) {
for (ZipEntry e : zipFile.entries()) {
String entryName = e.getName();
if (entryName.startsWith("classes") && entryName.endsWith(".dex")) {
if (!dexFileReaders.containsKey(entryName)) { // only the first one
dexFileReaders.put(entryName, new DexFileReader(toByteArray(zipFile.getInputStream(e))));
}
}
}
}
//下面是单dex文件和多dex文件的处理
if (dexFileReaders.size() == 0) {
throw new IOException("Can not find classes.dex in zip file");
} else if (dexFileReaders.size() == 1) {
return dexFileReaders.firstEntry().getValue();
} else {
return new MultiDexFileReader(dexFileReaders.values());
}
}
throw new IOException("the src file not a .dex or zip file");
}
//直接调用了另一个重载
public DexFileReader(byte[] data) {
this(ByteBuffer.wrap(data));
}
//解析dex的关键位置
public DexFileReader(ByteBuffer in) {
in.position(0);
in = in.asReadOnlyBuffer().order(ByteOrder.BIG_ENDIAN);
int magic = in.getInt() & 0xFFFFFF00;
final int MAGIC_DEX = 0x6465780A & 0xFFFFFF00;// hex for 'dex ', ignore the 0A
final int MAGIC_ODEX = 0x6465790A & 0xFFFFFF00;// hex for 'dey ', ignore the 0A
//这里区分一下是dex还是odex。odex情况就直接抛出异常了
if (magic == MAGIC_DEX) {
;
} else if (magic == MAGIC_ODEX) {
throw new DexException("Not support odex");
} else {
throw new DexException("not support magic.");
}
//下面是取出dexHeader相关的一系列数据了
int version = in.getInt() >> 8;
if (version < 0 || version < DEX_035) {
throw new DexException("not support version.");
}
this.dex_version = version;
in.order(ByteOrder.LITTLE_ENDIAN);
// skip uint checksum
// and 20 bytes signature
// and uint file_size
// and uint header_size 0x70
// 意思是偏移跳过上面的那些数据,跳过是根据上面的字段占的空间来直接偏移即可
skip(in, 4 + 20 + 4 + 4);
//这个是cpu字节序,非小端序就直接抛出异常了。
int endian_tag = in.getInt();
if (endian_tag != ENDIAN_CONSTANT) {
throw new DexException("not support endian_tag");
}
// skip uint link_size
// and uint link_off
//再跳过上面的两个字段
skip(in, 4 + 4);
//把下面那些重要部分的偏移全部取出来保存。
int map_off = in.getInt();
string_ids_size = in.getInt();
int string_ids_off = in.getInt();
type_ids_size = in.getInt();
int type_ids_off = in.getInt();
proto_ids_size = in.getInt();
int proto_ids_off = in.getInt();
field_ids_size = in.getInt();
int field_ids_off = in.getInt();
method_ids_size = in.getInt();
int method_ids_off = in.getInt();
class_defs_size = in.getInt();
int class_defs_off = in.getInt();
// skip uint data_size data_off
int call_site_ids_off = 0;
int call_site_ids_size = 0;
int method_handle_ids_off = 0;
int method_handle_ids_size = 0;
//如果是高版本的相关处理(大于DEX_037的版本我好像没见过。)
if (dex_version > DEX_037) {
in.position(map_off);
int size = in.getInt();
for (int i = 0; i < size; i++) {
int type = in.getShort() & 0xFFFF;
in.getShort(); // unused;
int item_size = in.getInt();
int item_offset = in.getInt();
switch (type) {
case TYPE_CALL_SITE_ID_ITEM:
call_site_ids_off = item_offset;
call_site_ids_size = item_size;
break;
case TYPE_METHOD_HANDLE_ITEM:
method_handle_ids_off = item_offset;
method_handle_ids_size = item_size;
break;
default:
break;
}
}
}
//看这个意思是只有DEX_037以上的dex才有这个值,低版本默认0即可。
this.call_site_ids_size = call_site_ids_size;
this.method_handle_ids_size = method_handle_ids_size;
//直接从内存中把这些重要数据的块给切片出来单独存放,后面使用就不需要全部通过偏移来找了。
//这里的长度计算,是根据各个结构体的大小*列表长度计算。
stringIdIn = slice(in, string_ids_off, string_ids_size * 4);
typeIdIn = slice(in, type_ids_off, type_ids_size * 4);
protoIdIn = slice(in, proto_ids_off, proto_ids_size * 12);
fieldIdIn = slice(in, field_ids_off, field_ids_size * 8);
methoIdIn = slice(in, method_ids_off, method_ids_size * 8);
classDefIn = slice(in, class_defs_off, class_defs_size * 32);
//下面这两个不用在意把,DEX_037以上的版本才有
callSiteIdIn = slice(in, call_site_ids_off, call_site_ids_size * 4);
methodHandleIdIn = slice(in, method_handle_ids_off, method_handle_ids_size * 8);
//初始化下面这些数据,并且设置好字节序,这里还没设置偏移来着。感觉应该用一个变量就行了。
in.position(0);
annotationsDirectoryItemIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
annotationSetItemIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
annotationItemIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
annotationSetRefListIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
classDataIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
codeItemIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
stringDataIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
encodedArrayItemIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
typeListIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
debugInfoIn = in.duplicate().order(ByteOrder.LITTLE_ENDIAN);
}
public void to(Path file) throws IOException {
if (Files.exists(file) && Files.isDirectory(file)) {
doTranslate(file);
} else {
try (FileSystem fs = createZip(file)) {
doTranslate(fs.getPath("/"));
}
}
}
private void doTranslate(final Path dist) throws IOException {
...
DexFileNode fileNode = new DexFileNode();
try {
//这里的reader就是我们前面看到的那个解析dexHeader的处理,这里就是完整解析dex转换成fileNode
reader.accept(fileNode, readerConfig | DexFileReader.IGNORE_READ_EXCEPTION);
} catch (Exception ex) {
exceptionHandler.handleFileException(ex);
}
...
//.convertDex(fileNode, cvf); 最后调用的这个函数是把前面转换好的fileNode给转换成.class文件。最后打包成jar
}
//这里遍历所有class_def,然后调用另一个重载
public void accept(DexFileVisitor dv, int config) {
dv.visitDexFileVersion(this.dex_version);
for (int cid = 0; cid < class_defs_size; cid++) {
accept(dv, cid, config);
}
dv.visitEnd();
}
public void accept(DexFileVisitor dv, int classIdx, int config) {
//根据classdef索引找到偏移位置
classDefIn.position(classIdx * 32);
//解析出classdef的各项字段
int class_idx = classDefIn.getInt();
int access_flags = classDefIn.getInt();
int superclass_idx = classDefIn.getInt();
int interfaces_off = classDefIn.getInt();
int source_file_idx = classDefIn.getInt();
int annotations_off = classDefIn.getInt();
int class_data_off = classDefIn.getInt();
int static_values_off = classDefIn.getInt();
String className = getType(class_idx);
//这里看到可以设置忽略的classname,他这里是空的,固定返回false
if(ignoreClass(className)) return;
String superClassName = getType(superclass_idx);
String[] interfaceNames = getTypeList(interfaces_off);
try {
//visit这个是一个保存的功能,把值保存在了那个fileNode的里面。
DexClassVisitor dcv = dv.visit(access_flags, className, superClassName, interfaceNames);
if (dcv != null)// 不处理
{
//拿到了classdef的相关数据的偏移位置,再根据这些数据对类进行详细解析
acceptClass(dcv, source_file_idx, annotations_off, class_data_off, static_values_off, config);
dcv.visitEnd();
}
} catch (Exception ex) {
DexException dexException = new DexException(ex, "Error process class: [%d]%s", class_idx, className);
if (0 != (config & IGNORE_READ_EXCEPTION)) {
niceExceptionMessage(dexException, 0);
} else {
throw dexException;
}
}
}
private void acceptClass(DexClassVisitor dcv, int source_file_idx, int annotations_off, int class_data_off,
int static_values_off, int config) {
if ((config & SKIP_DEBUG) == 0) {
// 获取源文件
if (source_file_idx != -1) {
dcv.visitSource(this.getString(source_file_idx));
}
}
//字段的注释
Map<Integer, Integer> fieldAnnotationPositions;
//方法的注释
Map<Integer, Integer> methodAnnotationPositions;
//参数的注释
Map<Integer, Integer> paramAnnotationPositions;
if ((config & SKIP_ANNOTATION) == 0) {
// 获取注解
fieldAnnotationPositions = new HashMap<Integer, Integer>();
methodAnnotationPositions = new HashMap<Integer, Integer>();
paramAnnotationPositions = new HashMap<Integer, Integer>();
// 如果有注释的偏移,下面则解析出注释的数据,保存到上面的对应map中
if (annotations_off != 0) { // annotations_directory_item
annotationsDirectoryItemIn.position(annotations_off);
int class_annotations_off = annotationsDirectoryItemIn.getInt();
int field_annotation_size = annotationsDirectoryItemIn.getInt();
int method_annotation_size = annotationsDirectoryItemIn.getInt();
int parameter_annotation_size = annotationsDirectoryItemIn.getInt();
for (int i = 0; i < field_annotation_size; i++) {
int field_idx = annotationsDirectoryItemIn.getInt();
int field_annotations_offset = annotationsDirectoryItemIn.getInt();
fieldAnnotationPositions.put(field_idx, field_annotations_offset);
}
for (int i = 0; i < method_annotation_size; i++) {
int method_idx = annotationsDirectoryItemIn.getInt();
int method_annotation_offset = annotationsDirectoryItemIn.getInt();
methodAnnotationPositions.put(method_idx, method_annotation_offset);
}
for (int i = 0; i < parameter_annotation_size; i++) {
int method_idx = annotationsDirectoryItemIn.getInt();
int parameter_annotation_offset = annotationsDirectoryItemIn.getInt();
paramAnnotationPositions.put(method_idx, parameter_annotation_offset);
}
// 如果有对类的注释偏移
if (class_annotations_off != 0) {
try {
read_annotation_set_item(class_annotations_off, dcv);
} catch (Exception e) {
throw new DexException("error on reading Annotation of class ", e);
}
}
}
} else {
fieldAnnotationPositions = null;
methodAnnotationPositions = null;
paramAnnotationPositions = null;
}
//类详细数据的解析
if (class_data_off != 0) {
ByteBuffer in = classDataIn;
in.position(class_data_off);
//静态字段
int static_fields = (int) readULeb128i(in);
//实例字段
int instance_fields = (int) readULeb128i(in);
//实例函数
int direct_methods = (int) readULeb128i(in);
//虚函数
int virtual_methods = (int) readULeb128i(in);
{
int lastIndex = 0;
{
Object[] constant = null;
if ((config & SKIP_FIELD_CONSTANT) == 0) {
if (static_values_off != 0) {
constant = read_encoded_array_item(static_values_off);
}
}
for (int i = 0; i < static_fields; i++) {
Object value = null;
if (constant != null && i < constant.length) {
value = constant[i];
}
// 解析并填充字段
lastIndex = acceptField(in, lastIndex, dcv, fieldAnnotationPositions, value, config);
}
}
lastIndex = 0;
for (int i = 0; i < instance_fields; i++) {
// 解析并填充字段
lastIndex = acceptField(in, lastIndex, dcv, fieldAnnotationPositions, null, config);
}
lastIndex = 0;
boolean firstMethod = true;
for (int i = 0; i < direct_methods; i++) {
// 解析并填充方法
lastIndex = acceptMethod(in, lastIndex, dcv, methodAnnotationPositions, paramAnnotationPositions,
config, firstMethod);
firstMethod = false;
}
lastIndex = 0;
firstMethod = true;
for (int i = 0; i < virtual_methods; i++) {
// 解析并填充方法
lastIndex = acceptMethod(in, lastIndex, dcv, methodAnnotationPositions, paramAnnotationPositions,
config, firstMethod);
firstMethod = false;
}
}
}
}
//字段数据的解析获取
private Field getField(int id) {
fieldIdIn.position(id * 8);
int owner_idx = 0xFFFF & fieldIdIn.getShort();
int type_idx = 0xFFFF & fieldIdIn.getShort();
int name_idx = fieldIdIn.getInt();
return new Field(getType(owner_idx), getString(name_idx), getType(type_idx));
}
private int acceptField(ByteBuffer in, int lastIndex, DexClassVisitor dcv,
Map<Integer, Integer> fieldAnnotationPositions, Object value, int config) {
int diff = (int) readULeb128i(in);
int field_access_flags = (int) readULeb128i(in);
int field_id = lastIndex + diff;
// 取出字段的数据
Field field = getField(field_id);
// 下面是直接填充字段内容
// //////////////////////////////////////////////////////////////
DexFieldVisitor dfv = dcv.visitField(field_access_flags, field, value);
if (dfv != null) {
if ((config & SKIP_ANNOTATION) == 0) {
//字段注释的相关处理
Integer annotation_offset = fieldAnnotationPositions.get(field_id);
if (annotation_offset != null) {
try {
read_annotation_set_item(annotation_offset, dfv);
} catch (Exception e) {
throw new DexException(e, "while accept annotation in field:%s.", field.toString());
}
}
}
dfv.visitEnd();
}
// //////////////////////////////////////////////////////////////
return field_id;
}
//方法数据的解析获取
private Method getMethod(int id) {
methoIdIn.position(id * 8);
int owner_idx = 0xFFFF & methoIdIn.getShort();
int proto_idx = 0xFFFF & methoIdIn.getShort();
int name_idx = methoIdIn.getInt();
return new Method(getType(owner_idx), getString(name_idx), getProto(proto_idx));
}
private int acceptMethod(ByteBuffer in, int lastIndex, DexClassVisitor cv, Map<Integer, Integer> methodAnnos,
Map<Integer, Integer> parameterAnnos, int config, boolean firstMethod) {
int offset = in.position();
int diff = (int) readULeb128i(in);
int method_access_flags = (int) readULeb128i(in);
int code_off = (int) readULeb128i(in);
int method_id = lastIndex + diff;
Method method = getMethod(method_id);
...
try {
//填充方法数据
DexMethodVisitor dmv = cv.visitMethod(method_access_flags, method);
if (dmv != null) {
if ((config & SKIP_ANNOTATION) == 0) {
//处理方法的注释
Integer annotation_offset = methodAnnos.get(method_id);
if (annotation_offset != null) {
try {
read_annotation_set_item(annotation_offset, dmv);
} catch (Exception e) {
throw new DexException(e, "while accept annotation in method:%s.", method.toString());
}
}
//处理参数的注释
Integer parameter_annotation_offset = parameterAnnos.get(method_id);
if (parameter_annotation_offset != null) {
try {
read_annotation_set_ref_list(parameter_annotation_offset, dmv);
} catch (Exception e) {
throw new DexException(e, "while accept parameter annotation in method:%s.",
method.toString());
}
}
}
//如果有code_item。还继续进行指令的解析
if (code_off != 0) {
boolean keep = true;
if (0 != (SKIP_CODE & config)) {
keep = 0 != (KEEP_CLINIT & config) && method.getName().equals("<clinit>");
}
if(keep) {
DexCodeVisitor dcv = dmv.visitCode();
if (dcv != null) {
try {
//解析并填充code_item的数据
acceptCode(code_off, dcv, config, (method_access_flags & DexConstants.ACC_STATIC) != 0,
method);
} catch (Exception e) {
throw new DexException(e, "while accept code in method:[%s] @%08x", method.toString(),
code_off);
}
}
}
}
dmv.visitEnd();
}
} catch (Exception e) {
throw new DexException(e, "while accept method:[%s]", method.toString());
}
return method_id;
}
/* package */void acceptCode(int code_off, DexCodeVisitor dcv, int config, boolean isStatic, Method method) {
ByteBuffer in = codeItemIn;
in.position(code_off);
//取出code_item的字段数据
int registers_size = 0xFFFF & in.getShort();
in.getShort();// ins_size ushort
in.getShort();// outs_size ushort
int tries_size = 0xFFFF & in.getShort();
int debug_info_off = in.getInt();
//指令长度
int insns = in.getInt();
//这里是指令集,好像是因为指令是2个字节为一个单位的。所以要*2
byte[] insnsArray = new byte[insns * 2];
in.get(insnsArray);
dcv.visitRegister(registers_size);
BitSet nextInsn = new BitSet();
Map<Integer, DexLabel> labelsMap = new TreeMap<Integer, DexLabel>();
Set<Integer> handlers = new HashSet<Integer>();
// 处理异常处理
if (tries_size > 0) {
if ((insns & 0x01) != 0) {// skip padding
in.getShort();
}
if (0 == (config & SKIP_EXCEPTION)) {
findTryCatch(in, dcv, tries_size, insns, labelsMap, handlers);
}
}
// 处理debug信息
if (debug_info_off != 0 && (0 == (config & SKIP_DEBUG))) {
DexDebugVisitor ddv = dcv.visitDebug();
if (ddv != null) {
read_debug_info(debug_info_off, registers_size, isStatic, method, labelsMap, ddv);
ddv.visitEnd();
}
}
BitSet badOps = new BitSet();
findLabels(insnsArray, nextInsn, badOps, labelsMap, handlers, method);
//解析并填充指令集
acceptInsn(insnsArray, dcv, nextInsn, badOps, labelsMap);
dcv.visitEnd();
}
疑问
新思路
先看看dex2jar的做法是:解析dex,dex数据全部展开解析为java结构体,与偏移无关了,然后转换.class的结构,写入文件,最后打包成jar。
fart中定义dex的完整结构体,fart解析完dex后,直接将数据保存在结构体中,达到了偏移无关了。然后在遍历bin的数据。将code_item数据替换。然后再将整个dex结构体重新解析生成一个新的dex。
就是给dex2jar新增一个功能。前面完全按照他的方式解析出fileNode,然后读取bin文件解析出code_item。接着替换fileNode中对应的code_item数据。最后再重新生成回dex文件。
看雪ID:misskings
https://bbs.pediy.com/user-home-659397.htm
*本文由看雪论坛 misskings 原创,转载请注明来自看雪社区
# 往期推荐
球分享
球点赞
球在看
点击“阅读原文”,了解更多!