Skip to content

Commit c7a0906

Browse files
committed
Support vector index
1 parent 395d6fb commit c7a0906

144 files changed

Lines changed: 9294 additions & 250 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ AliSQL is Alibaba's MySQL branch, forked from official MySQL and used extensivel
2121

2222
- **[DuckDB Storage Engine](./wiki/duckdb/duckdb.md)**:AliSQL integrates DuckDB as a native storage engine, allowing users to operate DuckDB with the same experience as MySQL. By leveraging AliSQL for rapid deployment of DuckDB service nodes, users can easily achieve lightweight analytical capabilities.
2323

24+
- **[Vector Storage](https://www.alibabacloud.com/help/en/rds/apsaradb-rds-for-mysql/vector-storage-1?spm=a2c63.p38356.help-menu-26090.d_3_3_0.6bb8d111D06xOW)**:AliSQL natively supports enterprise-grade vector processing for up to 16,383 dimensions. By integrating a highly optimized HNSW algorithm for high-performance Approximate Nearest Neighbor (ANN) search, AliSQL empowers users to build AI-driven applications—such as semantic search and recommendation systems—seamlessly using standard SQL interfaces.
25+
2426
## Roadmap
25-
- **[Vector Storage](https://www.alibabacloud.com/help/en/rds/apsaradb-rds-for-mysql/vector-storage-1?spm=a2c63.p38356.help-menu-26090.d_3_3_0.6bb8d111D06xOW)** *(planned)*:AliSQL natively supports enterprise-grade vector processing for up to 16,383 dimensions. By integrating a highly optimized HNSW algorithm for high-performance Approximate Nearest Neighbor (ANN) search, AliSQL empowers users to build AI-driven applications—such as semantic search and recommendation systems—seamlessly using standard SQL interfaces.
2627

2728
- **[DDL Optimization](https://www.alibabacloud.com/help/en/rds/apsaradb-rds-for-mysql/alisql-ddl-best-practices?spm=a2c63.p38356.help-menu-26090.d_2_8_0.1f7a28a5F1ZVeK)** *(planned)*:AliSQL delivers a faster, safer, and lighter DDL experience through innovations such as enhanced Instant DDL, parallel B+tree construction, a non-blocking lock mechanism, and real-time DDL apply—significantly improving schema change efficiency and virtually eliminating replication lag.
2829

@@ -88,6 +89,7 @@ AliSQL is based on MySQL, which is licensed under GPL-2.0. The DuckDB integratio
8889
## See Also
8990
- [AliSQL Release Notes](./wiki/changes-in-alisql-8.0.44.md)
9091
- [DuckDB Storage Engine in AliSQL](./wiki/duckdb/duckdb.md)
92+
- [Vector Index in AliSQL](./wiki/vidx/vidx_readme.md)
9193
- [MySQL 8.0 Documentation](https://dev.mysql.com/doc/refman/8.0/en/)
9294
- [MySQL 8.0 Github Repository](https://github.com/mysql/mysql-server)
9395
- [DuckDB Official Documentation](https://duckdb.org/docs/stable/)

config.h.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#cmakedefine HAVE_EXECINFO_H 1
4545
#cmakedefine HAVE_FPU_CONTROL_H 1
4646
#cmakedefine HAVE_GRP_H 1
47+
#cmakedefine HAVE_IMMINTRIN_H 1
4748
#cmakedefine HAVE_LANGINFO_H 1
4849
#cmakedefine HAVE_MALLOC_H 1
4950
#cmakedefine HAVE_NETINET_IN_H 1

configure.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ CHECK_INCLUDE_FILES (endian.h HAVE_ENDIAN_H)
195195
CHECK_INCLUDE_FILES (execinfo.h HAVE_EXECINFO_H)
196196
CHECK_INCLUDE_FILES (fpu_control.h HAVE_FPU_CONTROL_H)
197197
CHECK_INCLUDE_FILES (grp.h HAVE_GRP_H)
198+
CHECK_INCLUDE_FILES (immintrin.h HAVE_IMMINTRIN_H) # vidx
198199
CHECK_INCLUDE_FILES (langinfo.h HAVE_LANGINFO_H)
199200
CHECK_INCLUDE_FILES (malloc.h HAVE_MALLOC_H)
200201
CHECK_INCLUDE_FILES (netinet/in.h HAVE_NETINET_IN_H)

include/m_ctype.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,4 +782,14 @@ static inline bool is_supported_parser_charset(const CHARSET_INFO *cs) {
782782
return (cs->mbminlen == 1);
783783
}
784784

785+
static inline void my_ci_hash_sort(CHARSET_INFO *ci, const uchar *key,
786+
size_t len, ulong *nr1, ulong *nr2) {
787+
(ci->coll->hash_sort)(ci, key, len, nr1, nr2);
788+
}
789+
790+
extern "C" {
791+
void my_hash_sort_bin(const CHARSET_INFO *cs [[maybe_unused]], const uchar *key,
792+
size_t len, uint64 *nr1, uint64 *nr2);
793+
} // extern "C"
794+
785795
#endif // M_CTYPE_INCLUDED

include/my_base.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,8 @@ enum ha_base_keytype {
520520
/* The combination of the above can be used for key type comparison. */
521521
#define HA_KEYFLAG_MASK \
522522
(HA_NOSAME | HA_PACK_KEY | HA_AUTO_KEY | HA_BINARY_PACK_KEY | HA_FULLTEXT | \
523-
HA_UNIQUE_CHECK | HA_SPATIAL | HA_NULL_ARE_EQUAL | HA_GENERATED_KEY)
523+
HA_UNIQUE_CHECK | HA_SPATIAL | HA_NULL_ARE_EQUAL | HA_GENERATED_KEY | \
524+
HA_VECTOR)
524525

525526
/** Fulltext index uses [pre]parser */
526527
#define HA_USES_PARSER (1 << 14)
@@ -555,6 +556,7 @@ enum ha_base_keytype {
555556

556557
constexpr const ulong HA_INDEX_USES_ENGINE_ATTRIBUTE{1UL << 20};
557558
constexpr const ulong HA_INDEX_USES_SECONDARY_ENGINE_ATTRIBUTE{1UL << 21};
559+
constexpr const ulong HA_VECTOR{1UL << 22};
558560

559561
/* These flags can be added to key-seg-flag */
560562

include/my_bit.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,45 @@ static inline uint my_bit_log2(ulong value) {
5151
return bit;
5252
}
5353

54+
/*
55+
my_bit_log2_xxx()
56+
57+
In the given value, find the highest bit set,
58+
which is the smallest X that satisfies the condition: (2^X >= value).
59+
Can be used as a reverse operation for (1<<X), to find X.
60+
61+
Examples:
62+
- returns 0 for (1<<0)
63+
- returns 1 for (1<<1)
64+
- returns 2 for (1<<2)
65+
- returns 1 for 3, which has (1<<1) as the highest bit set.
66+
67+
Note, the behaviour of log2(0) is not defined.
68+
Let's return 0 for the input 0, for the code simplicity.
69+
See the 000x branch. It covers both (1<<0) and 0.
70+
*/
71+
static inline constexpr uint my_bit_log2_hex_digit(uint8 value) {
72+
return value & 0x0C ? /*1100*/ (value & 0x08 ? /*1000*/ 3 : /*0100*/ 2) :
73+
/*0010*/ (value & 0x02 ? /*0010*/ 1 : /*000x*/ 0);
74+
}
75+
static inline constexpr uint my_bit_log2_uint8(uint8 value) {
76+
return value & 0xF0 ? my_bit_log2_hex_digit((uint8)(value >> 4)) + 4
77+
: my_bit_log2_hex_digit(value);
78+
}
79+
static inline constexpr uint my_bit_log2_uint16(uint16 value) {
80+
return value & 0xFF00 ? my_bit_log2_uint8((uint8)(value >> 8)) + 8
81+
: my_bit_log2_uint8((uint8)value);
82+
}
83+
static inline constexpr uint my_bit_log2_uint32(uint32 value) {
84+
return value & 0xFFFF0000UL ? my_bit_log2_uint16((uint16)(value >> 16)) + 16
85+
: my_bit_log2_uint16((uint16)value);
86+
}
87+
static inline constexpr uint my_bit_log2_uint64(ulonglong value) {
88+
return value & 0xFFFFFFFF00000000ULL
89+
? my_bit_log2_uint32((uint32)(value >> 32)) + 32
90+
: my_bit_log2_uint32((uint32)value);
91+
}
92+
5493
static inline uint my_count_bits(ulonglong v) {
5594
#if SIZEOF_LONG_LONG > 4
5695
/* The following code is a bit faster on 16 bit machines than if we would

include/my_byteorder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,11 @@ inline uchar *store32be(uchar *ptr, uint32 val) {
322322
return pointer_cast<uchar *>(store32be(pointer_cast<char *>(ptr), val));
323323
}
324324

325+
/* convenience helpers */
326+
static inline float get_float(const void *from) {
327+
return float4get(((const uchar *)from));
328+
}
329+
325330
#endif /* __cplusplus */
326331

327332
#endif /* MY_BYTEORDER_INCLUDED */

include/my_sys.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,11 +789,13 @@ extern bool my_init_dynamic_array(DYNAMIC_ARRAY *array, PSI_memory_key key,
789789

790790
#define dynamic_element(array, array_index, type) \
791791
((type)((array)->buffer) + (array_index))
792+
#define reset_dynamic(array) ((array)->elements = 0)
792793

793794
/* Some functions are still in use in C++, because HASH uses DYNAMIC_ARRAY */
794795
extern bool insert_dynamic(DYNAMIC_ARRAY *array, const void *element);
795796
extern void *alloc_dynamic(DYNAMIC_ARRAY *array);
796797
extern void delete_dynamic(DYNAMIC_ARRAY *array);
798+
extern void *pop_dynamic(DYNAMIC_ARRAY *);
797799

798800
extern bool init_dynamic_string(DYNAMIC_STRING *str, const char *init_str,
799801
size_t init_alloc);

include/vidx/SIMD.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#ifndef SIMD_INCLUDED
2+
#define SIMD_INCLUDED
3+
4+
/*
5+
MIT License
6+
7+
Copyright (c) 2023 Sasha Krassovsky
8+
9+
Permission is hereby granted, free of charge, to any person obtaining a copy
10+
of this software and associated documentation files (the "Software"), to deal
11+
in the Software without restriction, including without limitation the rights
12+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13+
copies of the Software, and to permit persons to whom the Software is
14+
furnished to do so, subject to the following conditions:
15+
16+
The above copyright notice and this permission notice shall be included in all
17+
copies or substantial portions of the Software.
18+
19+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25+
SOFTWARE.
26+
*/
27+
28+
// https://save-buffer.github.io/bloom_filter.html
29+
30+
/*
31+
Use gcc function multiversioning to optimize for a specific CPU with run-time
32+
detection. Works only for x86, for other architectures we provide only one
33+
implementation for now.
34+
*/
35+
#define DEFAULT_IMPLEMENTATION
36+
#if __GNUC__ > 7
37+
#ifdef __x86_64__
38+
#ifdef HAVE_IMMINTRIN_H
39+
#include <immintrin.h>
40+
#undef DEFAULT_IMPLEMENTATION
41+
#define DEFAULT_IMPLEMENTATION __attribute__((target("default")))
42+
#define AVX2_IMPLEMENTATION __attribute__((target("avx2,avx,fma")))
43+
#if __GNUC__ > 9
44+
#define AVX512_IMPLEMENTATION __attribute__((target("avx512f,avx512bw")))
45+
#endif
46+
#endif
47+
#endif
48+
#ifdef __aarch64__
49+
#include <arm_neon.h>
50+
#undef DEFAULT_IMPLEMENTATION
51+
#define NEON_IMPLEMENTATION
52+
#endif
53+
#endif
54+
55+
#endif /* SIMD_INCLUDED */

0 commit comments

Comments
 (0)