Skip to content

Commit 4cf8aa4

Browse files
committed
forceinline
1 parent 616c7d5 commit 4cf8aa4

6 files changed

Lines changed: 118 additions & 121 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ build/
99
!.gitignore
1010
CMakePresets.json
1111
CMakeSettings.json
12-
*.s
12+
*.s

CMakeLists.txt

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set(CMAKE_CXX_STANDARD 17)
55
set(CMAKE_CXX_STANDARD_REQUIRED ON)
66

77
# --- Compiler tuning ---
8-
set(ASAN_ENABLED OFF CACHE BOOL "Whether to enable ASan or not")
8+
set(SAN_ENABLED OFF CACHE BOOL "Whether to enable ASan or not")
99
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
1010
if (CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
1111
# clang-cl: forward GCC/Clang style constexpr flags via /clang:
@@ -24,24 +24,12 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
2424
-march=native -mtune=native
2525
-ftemplate-backtrace-limit=0
2626
)
27-
if (ASAN_ENABLED)
28-
add_compile_options(-fsanitize=address)
29-
add_link_options(-fsanitize=address)
30-
endif()
3127
endif()
3228
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
3329
add_compile_options(-fconstexpr-ops-limit=2000000000 -fconstexpr-depth=1024 -march=native -mtune=native -ftemplate-backtrace-limit=0)
34-
if (ASAN_ENABLED)
35-
add_compile_options(-fsanitize=address)
36-
add_link_options(-fsanitize=address)
37-
endif()
3830
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
3931
set(ARCH_FLAG "/arch:AVX2" CACHE STRING "MSVC architecture flag (/arch:SSE2, /arch:AVX, /arch:AVX2, /arch:AVX512)")
4032
add_compile_options(/constexpr:steps2000000000 /constexpr:depth1024 ${ARCH_FLAG})
41-
if (ASAN_ENABLED)
42-
add_compile_options(/fsanitize=address)
43-
add_link_options(/fsanitize=address)
44-
endif()
4533
endif()
4634

4735
add_compile_definitions(GENERATE_AT_RUNTIME)

bitboard.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#if defined(_MSC_VER)
55
#include <intrin.h>
66
#endif
7-
7+
#include <immintrin.h>
88
namespace chess {
99
// -------------------------------
1010
// constexpr fallbacks
@@ -43,7 +43,7 @@ constexpr int msb_constexpr(Bitboard x) noexcept {
4343
#if defined(__GNUG__) || defined(__clang__)
4444
[[gnu::const]]
4545
#endif
46-
inline constexpr int popcount(Bitboard x) noexcept {
46+
__FORCEINLINE constexpr int popcount(Bitboard x) noexcept {
4747
#if defined(__GNUG__) || defined(__clang__)
4848
if (!is_constant_evaluated())
4949
return __builtin_popcountll(x);
@@ -57,7 +57,7 @@ inline constexpr int popcount(Bitboard x) noexcept {
5757
#if defined(__GNUG__) || defined(__clang__)
5858
[[gnu::const]]
5959
#endif
60-
inline constexpr int lsb(Bitboard x) noexcept {
60+
__FORCEINLINE constexpr int lsb(Bitboard x) noexcept {
6161
#if defined(__GNUG__) || defined(__clang__)
6262
if (!is_constant_evaluated())
6363
return __builtin_ctzll(x);
@@ -74,7 +74,7 @@ inline constexpr int lsb(Bitboard x) noexcept {
7474
#if defined(__GNUG__) || defined(__clang__)
7575
[[gnu::const]]
7676
#endif
77-
inline constexpr int msb(Bitboard x) noexcept {
77+
__FORCEINLINE constexpr int msb(Bitboard x) noexcept {
7878
#if defined(__GNUG__) || defined(__clang__)
7979
if (!is_constant_evaluated())
8080
return 63 - __builtin_clzll(x);
@@ -91,13 +91,17 @@ inline constexpr int msb(Bitboard x) noexcept {
9191
// -------------------------------
9292
// destructive variants
9393
// -------------------------------
94-
inline int pop_lsb(Bitboard &b) noexcept {
94+
__FORCEINLINE int pop_lsb(Bitboard &b) noexcept {
9595
int c = lsb(b);
96+
#ifndef __BMI2__
9697
b &= b - 1;
98+
#else
99+
_blsr_u64(b);
100+
#endif
97101
return c;
98102
}
99103

100-
inline int pop_msb(Bitboard &b) noexcept {
104+
__FORCEINLINE int pop_msb(Bitboard &b) noexcept {
101105
int c = msb(b);
102106
b &= ~(1ULL << c);
103107
return c;

position.cpp

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ template <int Offset = 0> struct alignas(64) SplatTable {
2222
}
2323
};
2424

25-
inline constexpr SplatTable<> SPLAT_TABLE{};
26-
template <int Offset> inline constexpr SplatTable<Offset> SPLAT_PAWN_TABLE{};
25+
__FORCEINLINE constexpr SplatTable<> SPLAT_TABLE{};
26+
template <int Offset> __FORCEINLINE constexpr SplatTable<Offset> SPLAT_PAWN_TABLE{};
2727
// AVX-512 (32 lanes of uint16_t)
28-
inline static Move *write_moves(Move *moveList, uint32_t mask, __m512i vector) {
28+
__FORCEINLINE static Move *write_moves(Move *moveList, uint32_t mask, __m512i vector) {
2929
// Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4
3030
_mm512_storeu_si512(reinterpret_cast<__m512i *>(moveList), _mm512_maskz_compress_epi16(mask, vector));
3131
return moveList + popcount(mask);
3232
}
3333

34-
inline static Move *splat_moves(Move *moveList, Square from, Bitboard to_bb) {
34+
__FORCEINLINE static Move *splat_moves(Move *moveList, Square from, Bitboard to_bb) {
3535
const auto *table = reinterpret_cast<const __m512i *>(SPLAT_TABLE.data.data());
3636
__m512i fromVec = _mm512_set1_epi16(Move(from, SQUARE_ZERO).raw());
3737
// two 32-lane blocks (0..31, 32..63)
@@ -41,7 +41,7 @@ inline static Move *splat_moves(Move *moveList, Square from, Bitboard to_bb) {
4141
return moveList;
4242
}
4343

44-
template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitboard to_bb) {
44+
template <int offset> __FORCEINLINE static Move *splat_pawn_moves(Move *moveList, Bitboard to_bb) {
4545
const auto *table = reinterpret_cast<const __m512i *>(SPLAT_PAWN_TABLE<offset>.data.data());
4646
moveList = write_moves(moveList, static_cast<uint32_t>(to_bb >> 0), _mm512_load_si512(table + 0));
4747
moveList = write_moves(moveList, static_cast<uint32_t>(to_bb >> 32), _mm512_load_si512(table + 1));
@@ -62,8 +62,8 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
6262
// }
6363
//};
6464
//
65-
// inline constexpr SplatTable<> SPLAT_TABLE{};
66-
// template <int Offset> inline constexpr SplatTable<Offset> SPLAT_PAWN_TABLE{};
65+
// __FORCEINLINE constexpr SplatTable<> SPLAT_TABLE{};
66+
// template <int Offset> __FORCEINLINE constexpr SplatTable<Offset> SPLAT_PAWN_TABLE{};
6767
//
6868
// constexpr std::array<std::array<uint8_t, 16>, 256> build_shuffle_lut() {
6969
// std::array<std::array<uint8_t, 16>, 256> lut{};
@@ -99,7 +99,7 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
9999
// static ShuffleLutInitializer _shuffle_lut_init;
100100
//
101101
//// Compress 16×int16_t lanes from v according to mask, store contiguously, return #written
102-
// static inline int compressstore_epi16_avx2(int16_t* dst, __m256i v, uint16_t mask) {
102+
// static __FORCEINLINE int compressstore_epi16_avx2(int16_t* dst, __m256i v, uint16_t mask) {
103103
// __m128i lo = _mm256_castsi256_si128(v);
104104
// __m128i hi = _mm256_extracti128_si256(v, 1);
105105
//
@@ -122,12 +122,12 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
122122
// }
123123
//
124124
//// Same logical behavior as your AVX-512 write_moves()
125-
// inline Move* write_moves(Move* moveList, uint32_t mask, __m256i vector) {
125+
// __FORCEINLINE Move* write_moves(Move* moveList, uint32_t mask, __m256i vector) {
126126
// int n = compressstore_epi16_avx2(reinterpret_cast<int16_t*>(moveList), vector,
127127
// static_cast<uint16_t>(mask));
128128
// return moveList + n;
129129
// }
130-
// inline Move *write_moves(Move *moveList, uint32_t mask, __m256i lo_vec, __m256i hi_vec) {
130+
// __FORCEINLINE Move *write_moves(Move *moveList, uint32_t mask, __m256i lo_vec, __m256i hi_vec) {
131131
// uint16_t mask_lo = static_cast<uint16_t>(mask & 0xFFFF);
132132
// uint16_t mask_hi = static_cast<uint16_t>(mask >> 16);
133133
//
@@ -138,7 +138,7 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
138138
// }
139139
//
140140
//// ----------------- splat_moves AVX2 -----------------
141-
// inline Move *splat_moves(Move *moveList, uint16_t from, uint64_t to_bb) {
141+
// __FORCEINLINE Move *splat_moves(Move *moveList, uint16_t from, uint64_t to_bb) {
142142
// const uint16_t *base = SPLAT_TABLE.data.data();
143143
//
144144
// // load 4 blocks: 0..15, 16..31, 32..47, 48..63
@@ -159,7 +159,7 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
159159
// }
160160
//
161161
//// ----------------- splat_pawn_moves AVX2 -----------------
162-
// template <int Offset> inline Move *splat_pawn_moves(Move *moveList, uint64_t to_bb) {
162+
// template <int Offset> __FORCEINLINE Move *splat_pawn_moves(Move *moveList, uint64_t to_bb) {
163163
// const uint16_t *base = SPLAT_PAWN_TABLE<Offset>.data.data();
164164
//
165165
// __m256i t0 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(base + 0));
@@ -177,7 +177,7 @@ template <int offset> inline static Move *splat_pawn_moves(Move *moveList, Bitbo
177177
// }
178178

179179
#else
180-
template <Direction offset> inline static Move *splat_pawn_moves(Move *moveList, Bitboard to_bb) {
180+
template <Direction offset> __FORCEINLINE static Move *splat_pawn_moves(Move *moveList, Bitboard to_bb) {
181181
while (to_bb) {
182182
Square to = (Square)pop_lsb(to_bb);
183183
#if defined(_DEBUG) || !defined(NDEBUG)
@@ -191,7 +191,7 @@ template <Direction offset> inline static Move *splat_pawn_moves(Move *moveList,
191191
return moveList;
192192
}
193193

194-
inline static Move *splat_moves(Move *moveList, Square from, Bitboard to_bb) {
194+
__FORCEINLINE static Move *splat_moves(Move *moveList, Square from, Bitboard to_bb) {
195195
while (to_bb)
196196
*moveList++ = Move(from, (Square)pop_lsb(to_bb));
197197
return moveList;
@@ -250,7 +250,7 @@ template <typename PieceC, typename T> template <Color c> void _Position<PieceC,
250250
Bitboard occ_temp = occ();
251251
occ_temp ^= (1ULL << from) | ep_mask;
252252

253-
// inline attackers check
253+
// __FORCEINLINE attackers check
254254
Bitboard atks = 0;
255255
// atks |= attacks::pawn(c, king_sq) & (pieces<PAWN, ~c>() &~ep_mask);
256256
// atks |= attacks::knight(king_sq) & pieces<KNIGHT, ~c>();
@@ -655,7 +655,7 @@ template <typename PieceC, typename T> template <bool Strict> void _Position<Pie
655655
}
656656
}
657657

658-
template <typename PieceC, typename T> template <bool RetAll> inline auto _Position<PieceC, T>::undoMove() -> std::conditional_t<RetAll, HistoryEntry<PieceC> &, void> {
658+
template <typename PieceC, typename T> template <bool RetAll> auto _Position<PieceC, T>::undoMove() -> std::conditional_t<RetAll, HistoryEntry<PieceC> &, void> {
659659
// Save only the move (we'll restore the full state next)
660660
const Move move = current_state.mv;
661661

@@ -1231,17 +1231,6 @@ template <typename PieceC, typename T> bool _Position<PieceC, T>::is_insufficien
12311231

12321232
return false;
12331233
}
1234-
template <typename PieceC, typename T> inline bool _Position<PieceC, T>::has_repeated() const {
1235-
auto idx = history.size() - 1;
1236-
int end = std::min(rule50_count(), current_state.pliesFromNull);
1237-
while (end-- >= 4) {
1238-
if (history[idx].repetition)
1239-
return true;
1240-
1241-
idx--;
1242-
}
1243-
return false;
1244-
}
12451234
template <typename PieceC, typename T> CastlingRights _Position<PieceC, T>::clean_castling_rights() const {
12461235
constexpr Bitboard cr_WOO = 1ULL << SQ_H1;
12471236
constexpr Bitboard cr_WOOO = 1ULL << SQ_A1;
@@ -1317,7 +1306,6 @@ template void _Position<PieceC, void>::refresh_attacks(); \
13171306
template uint64_t _Position<PieceC, void>::zobrist() const; \
13181307
template Move _Position<PieceC, void>::parse_uci(std::string) const; \
13191308
template Move _Position<PieceC, void>::push_uci(std::string); \
1320-
template bool _Position<PieceC, void>::has_repeated() const; \
13211309
template bool _Position<PieceC, void>::is_valid<false>() const; \
13221310
template bool _Position<PieceC, void>::is_valid<true>() const; \
13231311
template bool _Position<PieceC, void>::is_insufficient_material() const;

0 commit comments

Comments
 (0)