| Issue |
173398
|
| Summary |
Clang++ HUGE misoptimisation if `constexpr` added to the function
|
| Labels |
clang
|
| Assignees |
|
| Reporter |
socketpair
|
```cpp
#include <cstddef>
#include <cstdint>
#include <ctime>
#include <iostream>
#include <utility>
#include <stdexcept>
#include <time.h>
#include <vector>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
namespace
{
struct NodeType
{
uint64_t leaves_bitmap;
uint64_t children_bitmap;
uint64_t leaves_base;
uint64_t children_base;
};
uint32_t ntoh(uint32_t a)
{
if constexpr (std::endian::native == std::endian::big) {
return a;
} else {
return std::byteswap(a);
}
}
auto black_box(uint32_t value)
{
asm volatile("" : "+r"(value));
return value;
}
}
[[gnu::noinline]] constexpr uint32_t findLPM(uint32_t addr, const unsigned char* m_array)
{
const NodeType* node = reinterpret_cast<const NodeType*>(m_array);
addr = ntoh(addr);
constexpr auto m_total_addr_bits = sizeof(uint32_t) * 8;
constexpr std::size_t STRIDE = 6;
#if defined(__clang__)
#pragma clang loop unroll_count(m_total_addr_bits / STRIDE + 1)
#elif defined(__GNUC__)
#pragma GCC unroll 0
#endif
for (std::size_t depth = 0; depth < (m_total_addr_bits + STRIDE); depth += STRIDE) {
const auto bit = uint64_t { 1 } << ((addr >> (m_total_addr_bits - STRIDE - depth)) & ((uint32_t { 1 } << STRIDE) - 1));
if (node->children_bitmap & bit) {
node = &reinterpret_cast<const NodeType*>(m_array)[node->children_base + std::popcount(node->children_bitmap & (bit - 1))];
} else {
return reinterpret_cast<const uint32_t*>(m_array)[node->leaves_base + std::popcount(node->leaves_bitmap & (bit | (bit - 1)))];
}
}
std::unreachable();
}
int main()
{
int fd_ = ::open("compressed.bin", O_RDONLY);
if (fd_ == -1)
throw std::system_error(errno, std::system_category(), "open failed");
struct stat st {};
if (::fstat(fd_, &st) == -1)
throw std::system_error(errno, std::system_category(), "fstat failed");
const unsigned char* data_ = (const unsigned char*) ::mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd_, 0);
if (data_ == MAP_FAILED)
throw std::runtime_error("mmap failed");
::close(fd_);
constexpr uint32_t ip = 925653069; // conv("77.88.44.55");
constexpr size_t iterations = 500000000ull;
struct timespec ts1, ts2;
if (clock_gettime(CLOCK_MONOTONIC, &ts1) == -1)
throw std::system_error(errno, std::system_category(), "clock_gettime()");
uint32_t out;
for (std::size_t i = 0; i < iterations; i++) {
out = findLPM(black_box(ip), data_);
}
if (clock_gettime(CLOCK_MONOTONIC, &ts2) == -1)
throw std::system_error(errno, std::system_category(), "clock_gettime()");
auto diff_nsec = (ts2.tv_sec * 1000000000ull + ts2.tv_nsec) - (ts1.tv_sec * 1000000000ull + ts1.tv_nsec);
std::cout << "Speed (Mlp/sec): " << iterations * 1000ull / diff_nsec << std::endl;
std::cout << "Just to tell compiler the result is used: " << out << std::endl;
}
```
> `[[gnu::noinline]] constexpr uint32_t findLPM(uint32_t addr, const unsigned char* m_array)`
removing `constexpr` here *OR* placing the function into anonymous namespace fixes the problem.
how to trigger?
```
#!/bin/bash
set -e -u -x
g++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_gcc --save-temps
clang++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_clang --save-temps
./as_gcc
./as_clang
```
gives:
```
+ g++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_gcc --save-temps
+ clang++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_clang --save-temps
+ ./as_gcc
Speed (Mlp/sec): 7923
Just to tell compiler the result is used: 42
+ ./as_clang
Speed (Mlp/sec): 166
Just to tell compiler the result is used: 42
```
Significant performance drop! but if I change sources as explained before:
```
+ g++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_gcc --save-temps
+ clang++ bug.cpp -std=c++23 -march=native -Wall -Wextra -O3 -o as_clang --save-temps
+ ./as_gcc
Speed (Mlp/sec): 7837
Just to tell compiler the result is used: 42
+ ./as_clang
Speed (Mlp/sec): 3968
Just to tell compiler the result is used: 42
```
```
$ LANG=C g++ --version
g++ (GCC) 15.2.1 20251211 (Red Hat 15.2.1-5)
$ LANG=C clang++ --version
clang version 21.1.7 (Fedora 21.1.7-1.fc43)
Target: x86_64-redhat-linux-gnu
```
In order to run you heed `compressed.bin`. Attaching it here in .zip file.
[a.zip](https://github.com/user-attachments/files/24316011/a.zip)
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs