其他
少写点 if-else 吧,它的效率有多低你知道吗?
The following article is from 程序喵大人 Author 程序喵大人
作者 |
首先看一段经典的代码,并统计它的执行时间:// test_predict.cc
#include <algorithm>
#include <ctime>
#include <iostream>
int main() {
const unsigned ARRAY_SIZE = 50000;
int data[ARRAY_SIZE];
const unsigned DATA_STRIDE = 256;
for (unsigned c = 0; c < ARRAY_SIZE; ++c) data[c] = std::rand() % DATA_STRIDE;
std::sort(data, data + ARRAY_SIZE);
{ // 测试部分
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i) {
for (unsigned c = 0; c < ARRAY_SIZE; ++c) {
if (data[c] >= 128) sum += data[c];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << "\n";
std::cout << "sum = " << sum << "\n";
}
return 0;
}
~/test$ g++ test_predict.cc ;./a.out
7.95312
sum = 480124300000
// std::sort(data, data + ARRAY_SIZE);
~/test$ g++ test_predict.cc ;./a.out
24.2188
sum = 480124300000
取指:Fetch
译指:Decode
执行:execute
回写:Write-back
静态分支预测:听名字就知道,该策略不依赖执行环境,编译器在编译时就已经对各个分支做好了预测。 动态分支预测:即运行时预测,CPU 会根据分支被选择的历史记录进行预测,如果最近多次都走了这个路口,那 CPU 做出预测时会优先考虑这个路口。
int t = (data[c] - 128) >> 31;
sum += ~t & data[c];
#include <algorithm>
#include <ctime>
#include <iostream>
int main() {
const unsigned ARRAY_SIZE = 50000;
int data[ARRAY_SIZE];
const unsigned DATA_STRIDE = 256;
for (unsigned c = 0; c < ARRAY_SIZE; ++c) data[c] = std::rand() % DATA_STRIDE;
int lookup[DATA_STRIDE];
for (unsigned c = 0; c < DATA_STRIDE; ++c) {
lookup[c] = (c >= 128) ? c : 0;
}
std::sort(data, data + ARRAY_SIZE);
{ // 测试部分
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i) {
for (unsigned c = 0; c < ARRAY_SIZE; ++c) {
// if (data[c] >= 128) sum += data[c];
sum += lookup[data[c]];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << "\n";
std::cout << "sum = " << sum << "\n";
}
return 0;
}
参考资料
http://matt33.com/2020/04/16/cpu-branch-predictor/
https://zhuanlan.zhihu.com/p/22469702
https://en.wikipedia.org/wiki/Branch_predictor
https://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-processing-an-unsorted-array