其他
多项式MBA原理及其在代码混淆中的应用
本文为看雪论坛优秀文章
看雪论坛作者ID:34r7hm4n
MATH WARNING: 本文涉及少量抽象代数知识,基本上都是网安专业信安数学必修课中学到的内容。推荐这篇文章《代数结构入门:群、环、域、向量空间》(https://zhuanlan.zhihu.com/p/21583674),总结得很好。
1
基本概念
定义集合Pm上的运算◦,◦即函数复合,例如f(x)◦g(x)=f(g(x))。集合Pm与运算◦构成一个群:
对于Pm中的多项式f(x),一定存在一个g(x),使得f(x)与g(x)满足f(x)◦g(x)=x或者说f(g(x))=x。g(x)可以通过以下公式来求解:
2
多项式求逆C语言实现
用到的有以下几个运算:求逆、求组合数、次方、相乘、求和、取反。这些运算都是在Z/(2^n)上的,也就是说运算结果都要模2^n。可以用flint2这个库来实现,代码如下:
#include "flint/fmpz_mod_poly.h"
#include "flint/flint.h"
#include <time.h>
#include <cstdio>
#include <cstdint>
#include <iostream>
using namespace std;
int factorial(int n){
int fc=1;
for(int i=1;i<=n;++i) fc *= i;
return fc;
}
int combo(int n,int m){
return factorial(n) / (factorial(m) * factorial(n-m));
}
fmpz** gen_poly(int degree){
fmpz_mod_ctx ctx;
fmpz n = 1LL << 32;
fmpz_mod_ctx_init(&ctx, &n);
fmpz_mod_poly_struct f, g;
fmpz_mod_poly_init(&f, &ctx);
fmpz_mod_poly_init(&g, &ctx);
fmpz m = degree;
fmpz *a = new fmpz[m + 1];
fmpz *b = new fmpz[m + 1];
fmpz a1_inv;
fmpz *A = new fmpz[m + 1];
fmpz *A_sum = new fmpz[m + 1];
a[0] = rand(), a[1] = rand() | 1;
for(int i = 2;i <= m;i ++){
a[i] = (rand() >> 16) << 16;
}
// Calculate a1_inv
fmpz_mod_inv(&a1_inv, a + 1, &ctx);
// Calculate A
fmpz_mod_pow_ui(A + m, &a1_inv, m, &ctx);
fmpz_mod_neg(A + m, A + m, &ctx);
fmpz_mod_mul(A + m, A + m, a + m, &ctx);
for(int k = m - 1; k >= 0;k --){
fmpz sum = 0;
for(int j = k + 1;j <= m;j ++){
fmpz tmp;
fmpz_mod_pow_ui(&tmp, a, j - k, &ctx);
fmpz_mod_mul(&tmp, &tmp, A + j, &ctx);
fmpz_mod_mul_ui(&tmp, &tmp, combo(j, k), &ctx);
fmpz_mod_add(&sum, &sum, &tmp, &ctx);
}
fmpz_mod_pow_ui(A + k, &a1_inv, k, &ctx);
fmpz_mod_mul(A + k, A + k, a + k, &ctx);
fmpz_mod_neg(A + k, A + k, &ctx);
fmpz_mod_sub(A + k, A + k, &sum, &ctx);
A_sum[k] = sum;
}
// Calculate bm
fmpz_mod_pow_ui(b + m, &a1_inv, m + 1, &ctx);
fmpz_mod_neg(b + m, b + m, &ctx);
fmpz_mod_mul(b + m, b + m, a + m, &ctx);
// Calculate bk
for(int k = 2;k < m;k ++){
fmpz_mod_pow_ui(b + k, &a1_inv, k + 1, &ctx);
fmpz_mod_mul(b + k, b + k, a + k, &ctx);
fmpz_mod_neg(b + k, b + k, &ctx);
fmpz tmp;
fmpz_mod_mul(&tmp, &a1_inv, A_sum + k, &ctx);
fmpz_mod_sub(b + k, b + k, &tmp, &ctx);
}
// Calculate b1
fmpz sum = 0;
for(int j = 2;j <= m;j ++){
fmpz tmp;
fmpz_mod_pow_ui(&tmp, a, j - 1, &ctx);
fmpz_mod_mul(&tmp, &tmp, A + j, &ctx);
fmpz_mod_mul_ui(&tmp, &tmp, j, &ctx);
fmpz_mod_add(&sum, &sum, &tmp, &ctx);
}
fmpz_mod_mul(&sum, &sum, &a1_inv, &ctx);
fmpz_mod_sub(b + 1, &a1_inv, &sum, &ctx);
// Calculate b0
b[0] = 0;
for(int j = 1;j <= m;j ++){
fmpz tmp;
fmpz_mod_pow_ui(&tmp, a, j, &ctx);
fmpz_mod_mul(&tmp, &tmp, b + j, &ctx);
fmpz_mod_add(b, b, &tmp, &ctx);
}
fmpz_mod_neg(b, b, &ctx);
fmpz **coeff = new fmpz*[2];
coeff[0] = a, coeff[1] = b;
delete[] A;
delete[] A_sum;
return coeff;
}
int main(){
int degree = 10;
srand(time(NULL));
fmpz** coeff = gen_poly(degree);
fmpz_mod_ctx ctx;
fmpz n = 1LL << 32;
fmpz_mod_ctx_init(&ctx, &n);
fmpz_mod_poly_struct f, g, res;
fmpz_mod_poly_init(&f, &ctx);
fmpz_mod_poly_init(&g, &ctx);
fmpz_mod_poly_init(&res, &ctx);
for(int i = 0;i <= degree;i ++){
fmpz_mod_poly_set_coeff_ui(&f, i, coeff[0][i], &ctx);
fmpz_mod_poly_set_coeff_ui(&g, i, coeff[1][i], &ctx);
}
fmpz_mod_poly_compose(&res, &g, &f, &ctx);
flint_printf("f(x) = "); fmpz_mod_poly_print_pretty(&f, "x", &ctx); flint_printf("\n");
flint_printf("g(x) = "); fmpz_mod_poly_print_pretty(&g, "x", &ctx); flint_printf("\n");
flint_printf("g(f(x)) = "); fmpz_mod_poly_print_pretty(&res, "x", &ctx); flint_printf("\n");
}
f(x) = 1756102656*x^10+947978240*x^9+368902144*x^8+1950089216*x^7+497156096*x^6+1583087616*x^5+178454528*x^4+202440704*x^3+932052992*x^2+421111353*x+593005452
g(x) = 2268332032*x^10+395247616*x^9+2160525312*x^8+2187591680*x^7+3999137792*x^6+833880064*x^5+806682624*x^4+1138688000*x^3+2095448064*x^2+3762448393*x+1736062996
g(f(x)) = x
fmpz_mod_ctx ctx;
fmpz n = 1LL << 32;
fmpz_mod_ctx_init(&ctx, &n);
#include "flint/fmpz_mod_poly.h"
#include "flint/flint.h"
#include "z3++.h"
#include <time.h>
#include <cstdio>
#include <cstdint>
#include <iostream>
using namespace std;
using namespace z3;
uint64_t inverse(uint64_t n){
context c;
solver s(c);
expr a = c.bv_val(n, 64);
expr a_inv = c.bv_const("a_inv", 64);
s.add(a * a_inv == 1);
s.check();
model m = s.get_model();
return m.eval(a_inv).get_numeral_uint64();
}
void gen_univariate_poly(uint64_t *a, uint64_t *b){
uint64_t a0, a1, b0, b1, a1_inv;
a0 = ((uint64_t)rand() << 32) | rand(), a1 = ((uint64_t)rand() << 32LL) | (rand() | 1);
// Calculate a1_inv
a1_inv = inverse(a1);
// Calculate b1
b1 = a1_inv;
// Calculate b0
b0 = -(b1 * a0);
uint64_t **coeff = new uint64_t*[2];
a[0] = a0, a[1] = a1, b[0] = b0, b[1] = b1;
}
3
混淆思路
生成一对互逆的一次多项式f(x)和g(x)
构造与x+y等价的线性MBA表达式E2=(x^y)+2*(x&y)(关于什么是线性MBA表达式见我的上一篇文章)
构造E3=g(f(E2)),由之前提到过的性质,g(f(E2))实际上就等于E2
用等价但更复杂E3表达式替代原x+y表达式
4
LLVM Pass实现
Github:
MBAObfuscation.cpp
MBAUtils.cpp
首先是把求逆多项式的代码移植一下,跟之前区别不大:
uint64_t inverse(uint64_t n, uint32_t bitWidth){
context c;
solver s(c);
expr a = c.bv_val(n, bitWidth);
expr a_inv = c.bv_const("a_inv", bitWidth);
s.add(a * a_inv == 1);
s.add(a_inv != 0);
s.check();
model m = s.get_model();
return m.eval(a_inv).get_numeral_uint64();
}
void generateUnivariatePoly(uint64_t *a, uint64_t *b, uint32_t bitWidth){
uint64_t a0, a1, b0, b1, a1_inv;
a0 = cryptoutils->get_uint64_t(), a1 = cryptoutils->get_uint64_t() | 1;
// Calculate a1_inv
a1_inv = inverse(a1, bitWidth);
// Calculate b1
b1 = a1_inv;
// Calculate b0
b0 = -(b1 * a0);
a[0] = a0, a[1] = a1, b[0] = b0, b[1] = b1;
}
Value* llvm::insertPolynomialMBA(Value *linearMBAExpr, BinaryOperator *insertBefore){
IRBuilder<> builder(insertBefore->getContext());
builder.SetInsertPoint(insertBefore);
Type *operandType = insertBefore->getOperand(0)->getType();
uint32_t bitWidth = operandType->getIntegerBitWidth();
uint64_t a[2], b[2];
generateUnivariatePoly(a, b, bitWidth);
Value *expr;
expr = builder.CreateMul(ConstantInt::get(operandType, b[1]), linearMBAExpr);
expr = builder.CreateAdd(expr, ConstantInt::get(operandType, b[0]));
expr = builder.CreateMul(ConstantInt::get(operandType, a[1]), expr);
expr = builder.CreateAdd(expr, ConstantInt::get(operandType, a[0]));
return expr;
}
void MBAObfuscation::substituteAdd(BinaryOperator *BI){
int64_t *terms = generateLinearMBA(TermsNumber);
terms[2] += 1;
terms[4] += 1;
Value *mbaExpr = insertPolynomialMBA(insertLinearMBA(terms, BI), BI);
BI->replaceAllUsesWith(mbaExpr);
}
5
混淆效果
#include <cstdio>
#include <cstring>
#include <cstdlib>
char *input;
char enc[100] = "\x86\x8a\x7d\x87\x93\x8b\x4d\x81\x80\x8a\
\x43\x7f\x49\x49\x86\x71\x7f\x62\x53\x69\x28\x9d";
void encrypt(unsigned char *dest, char *src){
int len = strlen(src);
for(int i = 0;i < len;i ++){
dest[i] = (src[i] + (32 - i)) ^ i;
}
}
//flag{s1mpl3_11vm_d3m0}
int main(int argc, char *argv[]){
printf("Welcome to LLVM world...\n");
if(argc <= 1){
printf("Input your flag as an argument.\n");
return 0;
}
input = argv[1];
printf("Your flag is: %s\n", input);
unsigned char dest[100] = {0};
encrypt(dest, input);
bool result = strlen(input) == 22 && !memcmp(dest, enc, 22);
if(result){
printf("Congratulations~\n");
}else{
printf("Sorry try again.\n");
}
}
看雪ID:34r7hm4n
https://bbs.pediy.com/user-home-910514.htm
# 往期推荐
1.Android netlink&svc 获取 Mac方法深入分析
球分享
球点赞
球在看
点击“阅读原文”,了解更多!