为了理解反汇编引擎而写的X86/X64反汇编引擎
本文为看雪论坛精华文章
看雪论坛作者ID:不化的雪
(1)Opcode:例如PUSH(注意缩写还有就是简写)一对多的放在类型说明里面
(2)Addressing Method + Operand Type:例如 Gv, Ev 一对多的单独作为一个类型
(3)Register Codes:例如 rBX/r11 一对多的单独作为一个类型
(4)Superscripts:例如 1A、i64 视情况而定,也是比较少。不适合做单独的类型
(5)Opcode Extensions Group Number: 例如 Grp 2 (intel的前面可能会有对该组的描述性文字如 Immediate)
(6)Prefix:例如Operand Size (Prefix)
(7)REX Prefix:例如REX.X 类型说明
(8 ) 描述性文字:例如word、double-word or quad-word register with rAX
(9)距离:例如near 类型说明,或者其他
(10)说明位于其他表:例如3-byte escape,VEX+2byte
(11)空白:直接类型说明,为0,与通用的一样,不过后面的opcode没有name
(12)异常代码:例如UD0, 太少不放进去结构体了
1、看位于哪个OPCODE表
2、取出指令,寻址类型还有操作数类型
3、注意符合寄存器表示方式的解析与操作数类型相关
4、写死在opcode里面的寄存器需要表示,这个是单独的。跟第5点不是存在一样的表示
5、根据寻址类型还有操作数类型确定后面是否有 ModR/M字节、SIB字节、偏移量字节(1、2、4、8字节)、立即数字节(1、2、4、8字节)。
6、依次解码opcode还有操作数1-3
typedef struct OPCODE_STRUCT
{
const char *opcode;
int AddressingMethod1st;
int OperandType1st;
int AddressingMethod2nd;
int OperandType2nd;
int AddressingMethod3rd;
int OperandType3rd;
int SelfType; //该指令字节码的类型
}OPCODE_STRUCT, *POPCODE_STRUCT;
//解析具体的opcode,先返回opcode字符串,还有就是处理操作数寻址方式还有操作数大小,方便后面统一处理
//开始先对三个操作数的寻址方式和操作数类型做默认的处理
decode_struct.AddressingMethod[0] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].AddressingMethod1st;
decode_struct.OperandType[0] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].OperandType1st;
decode_struct.AddressingMethod[1] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].AddressingMethod2nd;
decode_struct.OperandType[1] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].OperandType2nd;
decode_struct.AddressingMethod[2] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].AddressingMethod3rd;
decode_struct.OperandType[2] = popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].OperandType3rd;
switch (popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].SelfType)
{
case 0://通用的类型opcode解析
Result = Result + popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].opcode;
//还是在这里处理寻址方式和寻址类型然后返回相关信息,接下来再根据相关信息去解析出后面的字节(等指针移动之后)。
//通用的类型通用类型的直接获取opcode字符串,然后获取三个操作数的寻址方式还有操作数类型
break;
case PUPO:
if (REX_B_Flag)
{
decode_struct.AddressingMethod[0] = decode_struct.AddressingMethod[1];
decode_struct.AddressingMethod[1] = UNCON;
}
else
decode_struct.AddressingMethod[1] = UNCON;
Result = Result + Result + popcodemap[(int)HighNibble(*POpcode)][(int)LowNibble(*POpcode)].opcode;
break;
//篇幅有限,只是贴一部分代码
//先判断opcode后面的字节有没有ModRM字节
for (int icount = 0; icount <3;icount++)
{
switch (decode_struct.AddressingMethod[icount])
{
case C:
case D:
case G:
case P:
case S:
case V:
case E:
case M:
case N:
case Q:
case R:
case U:
case W:
ModRM_Flag = true;
if (0x4== RM(*(POpcode + 1)) && 0x3 != MOD(*(POpcode + 1)))//当ModRM.r/m == 100(0x4)且Mod ≠11b时,存在SIB字节
SIB_Flag = true;
if (0x0 == MOD(*(POpcode + 1)) && 0x5 == RM(*(POpcode + 1)))//当ModRM.r/m == 101(0x5)且MOD == 0时,disp为4字节
idispcount = 4;
if (0x1 == MOD(*(POpcode + 1)))//当ModRM.MOD == 1时,disp为1字节
idispcount = 1;
if (0x2 == MOD(*(POpcode + 1)))//当ModRM.MOD == 2时,disp为4字节
idispcount = 4;
if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x0 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 0时,disp为4字节
idispcount = 4;
if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x1 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 1时,disp为1字节
idispcount = 1;
if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x2 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 2时,disp为4字节
idispcount = 4;
break;
default:
break;
}
}
//General Registers in Legacy and Compatibility Modes OR General Registers in 64-Bit Mode Without REX
const char *GPRs32[0x3][0x8] =
{
{"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//8bit
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//16bit
{"EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI"} //32bit
};
//General Registers in 64-Bit Mode With REX
const char *GPRs64[0x4][0x10] =
{
{"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL", "R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B"},//8bit
{"AX", "CX", "DX", "BX", "SP" , "BP" , "SI" , "DI" , "R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W"},//16bit
{"EAX","ECX","EDX","EBX","ESP", "EBP", "ESI", "EDI", "R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D"},//32bit
{"RAX","RCX","RDX","RBX","RSP", "RBP", "RSI", "RDI", "R8" , "R9" , "R10" , "R11" , "R12", "R13" , "R14" , "R15" } //64bit
};
switch (decode_struct.AddressingMethod[icount])
{
case rAX:
case rCX:
case rDX:
case rBX:
case rSP:
case rBP:
case rSI:
case rDI:
case r8:
case r9:
case r10:
case r11:
case r12:
case r13:
case r14:
case r15://解析直接出现的寄存器操作数
if (UNCON != decode_struct.OperandType[icount])
Result = Result + GPRs64[decode_struct.OperandType[icount]][decode_struct.AddressingMethod[icount]];
else
{
if (MODE32 == Mode)
{
if (OPSizeFlag)
Result = Result + GPRs32[_16BIT][decode_struct.AddressingMethod[icount]];
else
Result = Result + GPRs32[_32BIT][decode_struct.AddressingMethod[icount]];
}
else
{
if (REX_W_Flag || REX_B_Flag)
Result = Result + GPRs64[_64BIT][decode_struct.AddressingMethod[icount]];
else
{
if (OPSizeFlag)
Result = Result + GPRs64[_16BIT][decode_struct.AddressingMethod[icount]];
else
Result = Result + GPRs64[_32BIT][decode_struct.AddressingMethod[icount]];
}
}
}
break;
1、根据当前的模式还有就是有没有操作数大小覆盖前缀来确定(没有大小描述的情况)
2、直接解码返回(有大小描述的情况)
//当不存在REX时,REG域数组,[具体的选项][REG]
const char *ModRM_REG0[0xA][0x8] =
{
{"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//reg8
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16
{"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"},//reg32
{"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"},//reg64
{"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx
{"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm
{"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm
{"ES", "CS", "SS", "DS", "FS", "GS", "invalid","invalid"},//sReg
{"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7" },//cReg
{"DR0", "DR1", "DR2", "DR3", "DR4", "DR5", "DR6", "DR7" } //dReg
};
//当存在REX且REX.R为0时,REG域数组,[具体的选项][REG]
const char *ModRM_REG10[0xA][0x8] =
{
{"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"},//reg8
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16
{"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"},//reg32
{"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"},//reg64
{"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx
{"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm
{"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm
{"ES", "CS", "SS", "DS", "FS", "GS","invalid","invalid"},//sReg
{"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7"},//cReg
{"DR0", "DR1", "DR2", "DR3", "DR4", "DR5", "DR6", "DR7"} //dReg
};
//当存在REX且REX.R为1时,REG域数组,[具体的选项][REG]
const char *ModRM_REG11[0xA][0x8] =
{
{"R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B"},//reg8
{"R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W"},//reg16
{"R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D"},//reg32
{"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"},//reg64
{"MMX0","MMX1","MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7"},//mmx
{"XMM8","XMM9","XMM10","XMM11","XMM12","XMM13","XMM14","XMM15"},//xmm
{"YMM8","YMM9","YMM10","YMM11","YMM12","YMM13","YMM14","YMM15"},//ymm
{"ES", "CS", "SS", "DS", "FS", "GS", "invalid","invalid"}, //sReg
{"CR8", "CR9", "CR10", "CR11", "CR12", "CR13", "CR14", "CR15" },//cReg
{"DR8", "DR9", "DR10", "DR11", "DR12", "DR13", "DR14", "DR15" } //dReg
};
//解码Mod.REG域函数
static void DecodeREG(BYTE* PModRM, CString &Result, bool REX_Flag, bool REX_W_Flag, bool REX_R_Flag, int Mode, bool OPSizeFlag, int DecodeType)
{
if (MODE32 == Mode)
{
if (GPRS != DecodeType)
Result = Result + " " + ModRM_REG0[DecodeType][REG(*PModRM)] + " ";
else
{
if (OPSizeFlag)
Result = Result + " " + ModRM_REG0[REG16][REG(*PModRM)] + " ";
else
Result = Result + " " + ModRM_REG0[REG32][REG(*PModRM)] + " ";
}
}
else
{
if (REX_Flag)
{
if (REX_R_Flag)
{
if (GPRS != DecodeType)
Result = Result + " " + ModRM_REG11[DecodeType][REG(*PModRM)] + " ";
else
{
if (REX_W_Flag)
Result = Result + " " + ModRM_REG11[REG64][REG(*PModRM)] + " ";
else
{
if (OPSizeFlag)
Result = Result + " " + ModRM_REG11[REG16][REG(*PModRM)] + " ";
else
Result = Result + " " + ModRM_REG11[REG32][REG(*PModRM)] + " ";
}
}
}
else
{
if (GPRS != DecodeType)
Result = Result + " " + ModRM_REG10[DecodeType][REG(*PModRM)] + " ";
else
{
if (REX_W_Flag)
Result = Result + " " + ModRM_REG10[REG64][REG(*PModRM)] + " ";
else
{
if (OPSizeFlag)
Result = Result + " " + ModRM_REG10[REG16][REG(*PModRM)] + " ";
else
Result = Result + " " + ModRM_REG10[REG32][REG(*PModRM)] + " ";
}
}
}
}
else
{
if (GPRS != DecodeType)
Result = Result + " " + ModRM_REG0[DecodeType][REG(*PModRM)] + " ";
else
{
if (REX_W_Flag)
Result = Result + " " + ModRM_REG0[REG64][REG(*PModRM)] + " ";
else
{
if (OPSizeFlag)
Result = Result + " " + ModRM_REG0[REG16][REG(*PModRM)] + " ";
else
Result = Result + " " + ModRM_REG0[REG32][REG(*PModRM)] + " ";
}
}
}
}
}
//ModRM R/M Field Encoding, 32-Bit and 64-Bit Addressing
//32位且Mod≠11时的RM域数组,[Mod][R/M]
const char *ModRM_RM32[0x3][0x8] =
{
{"EAX","ECX","EDX","EBX","","", "ESI","EDI"},
{"EAX","ECX","EDX","EBX","","EBP","ESI","EDI"},
{"EAX","ECX","EDX","EBX","","EBP","ESI","EDI"}
};
//64位REX.B = 0且Mod≠11时的RM数组,[Mod][R/M]
const char *ModRM_RM64_0[0x3][0x8] =
{
{"RAX","RCX","RDX","RBX","","", "RSI","RDI"},
{"RAX","RCX","RDX","RBX","","RBP","RSI","RDI"},
{"RAX","RCX","RDX","RBX","","RBP","RSI","RDI"}
};
//64位REX.B = 1且Mod≠11时的RM数组,[Mod][R/M]
const char *ModRM_RM64_1[0x3][0x8] =
{
{"R8","R9","R10","R11","","", "R14","R15"},
{"R8","R9","R10","R11","","R13","R14","R15"},
{"R8","R9","R10","R11","","R13","R14","R15"}
};
//32位且Mod =11时的RM域数组,[具体的选项][R/M]
const char *ModRM_RM32_11[0x5][0x8] =
{
{"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//reg8
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16
{"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI" },//reg32
{"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx
{"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"} //xmm
};
//64位REX.B = 0且Mod = 11时的RM数组,[具体的选项][R/M]
const char *ModRM_RM64_110[0x7][0x8] =
{
{"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL" },//reg8
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16
{"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI" },//reg32
{"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI" },//reg64
{"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx
{"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm
{"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm
};
//64位REX.B = 1且Mod = 11时的RM数组,[具体的选项][R/M]
const char *ModRM_RM64_111[0x7][0x8] =
{
{"R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B" },//reg8
{"R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W" },//reg16
{"R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D" },//reg32
{"R8" , "R9" , "R10" , "R11" , "R12", "R13" , "R14" , "R15" },//reg64
{"MMX0","MMX1","MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7" },//mmx
{"XMM8","XMM9","XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15"},//xmm
{"YMM8","YMM9","YMM10", "YMM11", "YMM12", "YMM13", "YMM14", "YMM15"},//ymm
};
//解码代码太长,不贴了。请看附件源代码DecodeModRM函数
先通过位数(模式)、REX.B和Mod来确定二维数组
1、Mod ≠ 11b,直接通过[Mod][R/M]就可以返回,通过Mod与R/M确定有没有SIB需要特殊处理,有Disp就解析出来。
2、Mod = 11b,通用寄存器需要确定大小(reg8~reg64)作为[具体的选项],mmx xmm ymm不需要。不需要解析SIB和Disp。
//32位SIB解析数组[scale比例][index索引]
const char *ScaledIndex32[0x4][0x8]
{
{"EAX", "ECX", "EDX", "EBX", "","EBP", "ESI", "EDI"},
{"EAX*2","ECX*2","EDX*2","EBX*2","","EBP*2","ESI*2","EDI*2"},
{"EAX*4","ECX*4","EDX*4","EBX*4","","EBP*4","ESI*4","EDI*4"},
{"EAX*8","ECX*8","EDX*8","EBX*8","","EBP*8","ESI*8","EDI*8"}
};
//64位当REX.X = 0时SIB解析数组[scale比例][index索引]
const char *ScaledIndex64_0[0x4][0x8]
{
{"RAX", "RCX", "RDX", "RBX", "","RBP", "RSI", "RDI"},
{"RAX*2","RCX*2","RDX*2","RBX*2","","RBP*2","RSI*2","RDI*2"},
{"RAX*4","RCX*4","RDX*4","RBX*4","","RBP*4","RSI*4","RDI*4"},
{"RAX*8","RCX*8","RDX*8","RBX*8","","RBP*8","RSI*8","RDI*8"}
};
//64位当REX.X = 1时SIB解析数组[scale比例][index索引]
const char *ScaledIndex64_1[0x4][0x8]
{
{"R8", "R9", "R10", "R11", "R12", "R13", "RR14", "R15"},
{"R8*2","R9*2","R10*2","R11*2","R12*2","R13*2","RR14*2","R15*2"},
{"R8*4","R9*4","R10*4","R11*4","R12*4","R13*4","RR14*4","R15*4"},
{"R8*8","R9*8","R10*8","R11*8","R12*8","R13*8","RR14*8","R15*8"}
};
case E:
if (0x3 != MOD(*(POpcode + 1)) && "" != StrPrefix1)
Result = Result + " " + StrPrefix1 + ":";
switch (decode_struct.OperandType[icount])
{
case v:
if (0x3 != MOD(*(POpcode + 1)))
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, 0);
else
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, GPRS);
break;
case b:
if (0x3 != MOD(*(POpcode + 1)))
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, _BYTEP);
else
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, REG8);
break;
default:
break;
}
break;
看雪ID:不化的雪
https://bbs.pediy.com/user-620577.htm
推荐文章++++
好书推荐