Skip to content

Commit db663af

Browse files
authored
feat: add C API (#12)
1 parent 53ee492 commit db663af

File tree

12 files changed

+838
-10
lines changed

12 files changed

+838
-10
lines changed

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ jobs:
4545
- name: Create singleheader.zip
4646
run: |
4747
cd build/singleheader
48-
zip singleheader.zip merve.h merve.cpp
48+
zip singleheader.zip merve.h merve.cpp merve_c.cpp
4949
mv singleheader.zip ../../singleheader/
50-
cp merve.h merve.cpp ../../singleheader/
50+
cp merve.h merve.cpp merve_c.cpp ../../singleheader/
5151
5252
- name: Create release
5353
run: gh release upload "$RELEASE_TAG" singleheader/*

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ if(NOT MERVE_COVERAGE AND NOT EMSCRIPTEN)
131131
endif()
132132

133133
install(
134-
FILES include/merve.h
134+
FILES include/merve.h include/merve_c.h
135135
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
136136
COMPONENT merve_development
137137
)

README.md

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ A fast C++ lexer for extracting named exports from CommonJS modules. This librar
99
- **Source Locations**: Each export includes a 1-based line number for tooling integration
1010
- **Unicode Support**: Properly unescapes JavaScript string literals including `\u{XXXX}` and surrogate pairs
1111
- **Optional SIMD Acceleration**: Can use [simdutf](https://github.com/simdutf/simdutf) for faster string operations
12+
- **C API**: Full C interface (`merve_c.h`) for use from C, FFI, or other languages
1213
- **No Dependencies**: Single-header distribution available (simdutf is optional)
1314
- **Cross-Platform**: Works on Linux, macOS, and Windows
1415

@@ -31,6 +32,7 @@ target_link_libraries(your_target PRIVATE lexer::lexer)
3132
### Single Header
3233

3334
Copy `singleheader/merve.h` and `singleheader/merve.cpp` to your project.
35+
The C API header `singleheader/merve_c.h` is also included in the distribution.
3436

3537
## Usage
3638

@@ -130,6 +132,95 @@ const std::optional<lexer_error>& get_last_error();
130132

131133
Returns the last parse error, if any.
132134

135+
## C API
136+
137+
merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or any language that can call C functions. The C API is compiled into the merve library alongside the C++ implementation.
138+
139+
### C API Usage
140+
141+
```c
142+
#include "merve_c.h"
143+
#include <stdio.h>
144+
145+
int main(void) {
146+
const char* source = "exports.foo = 1;\nexports.bar = 2;\n";
147+
148+
merve_analysis result = merve_parse_commonjs(source, strlen(source));
149+
150+
if (merve_is_valid(result)) {
151+
size_t count = merve_get_exports_count(result);
152+
printf("Found %zu exports:\n", count);
153+
for (size_t i = 0; i < count; i++) {
154+
merve_string name = merve_get_export_name(result, i);
155+
uint32_t line = merve_get_export_line(result, i);
156+
printf(" - %.*s (line %u)\n", (int)name.length, name.data, line);
157+
}
158+
} else {
159+
printf("Parse error: %d\n", merve_get_last_error());
160+
}
161+
162+
merve_free(result);
163+
return 0;
164+
}
165+
```
166+
167+
Output:
168+
```
169+
Found 2 exports:
170+
- foo (line 1)
171+
- bar (line 2)
172+
```
173+
174+
### C API Reference
175+
176+
#### Types
177+
178+
| Type | Description |
179+
|------|-------------|
180+
| `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. |
181+
| `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. |
182+
| `merve_version_components` | Struct with `major`, `minor`, `revision` fields. |
183+
184+
#### Functions
185+
186+
| Function | Description |
187+
|----------|-------------|
188+
| `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). |
189+
| `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. |
190+
| `merve_free(result)` | Free a parse result. NULL-safe. |
191+
| `merve_get_exports_count(result)` | Number of named exports found. |
192+
| `merve_get_reexports_count(result)` | Number of re-export specifiers found. |
193+
| `merve_get_export_name(result, index)` | Get export name at index. Returns `{NULL, 0}` on error. |
194+
| `merve_get_export_line(result, index)` | Get 1-based line number of export. Returns 0 on error. |
195+
| `merve_get_reexport_name(result, index)` | Get re-export specifier at index. Returns `{NULL, 0}` on error. |
196+
| `merve_get_reexport_line(result, index)` | Get 1-based line number of re-export. Returns 0 on error. |
197+
| `merve_get_last_error()` | Last error code (`MERVE_ERROR_*`), or -1 if no error. |
198+
| `merve_get_version()` | Version string (e.g. `"1.0.1"`). |
199+
| `merve_get_version_components()` | Version as `{major, minor, revision}`. |
200+
201+
#### Error Constants
202+
203+
| Constant | Value | Description |
204+
|----------|-------|-------------|
205+
| `MERVE_ERROR_UNEXPECTED_ESM_IMPORT` | 10 | Found ESM `import` declaration |
206+
| `MERVE_ERROR_UNEXPECTED_ESM_EXPORT` | 11 | Found ESM `export` declaration |
207+
| `MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META` | 9 | Found `import.meta` |
208+
| `MERVE_ERROR_UNTERMINATED_STRING_LITERAL` | 6 | Unclosed string literal |
209+
| `MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING` | 5 | Unclosed template literal |
210+
| `MERVE_ERROR_UNTERMINATED_REGEX` | 8 | Unclosed regular expression |
211+
| `MERVE_ERROR_UNEXPECTED_PAREN` | 1 | Unexpected `)` |
212+
| `MERVE_ERROR_UNEXPECTED_BRACE` | 2 | Unexpected `}` |
213+
| `MERVE_ERROR_UNTERMINATED_PAREN` | 3 | Unclosed `(` |
214+
| `MERVE_ERROR_UNTERMINATED_BRACE` | 4 | Unclosed `{` |
215+
| `MERVE_ERROR_TEMPLATE_NEST_OVERFLOW` | 12 | Template literal nesting too deep |
216+
217+
#### Lifetime Rules
218+
219+
- The `merve_analysis` handle must be freed with `merve_free()`.
220+
- `merve_string` values returned by accessors are valid as long as the handle has not been freed.
221+
- For exports backed by a `string_view` (most identifiers), the original source buffer must also remain valid.
222+
- All functions are NULL-safe: passing NULL returns safe defaults (false, 0, `{NULL, 0}`).
223+
133224
## Supported Patterns
134225
135226
### Direct Exports
@@ -243,8 +334,7 @@ cmake --build .
243334
### Running Tests
244335

245336
```bash
246-
cmake --build . --target real_world_tests
247-
./tests/real_world_tests
337+
ctest --test-dir build
248338
```
249339

250340
### Build Options

include/merve_c.h

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/**
2+
* @file merve_c.h
3+
* @brief Includes the C definitions for merve. This is a C file, not C++.
4+
*/
5+
#ifndef MERVE_C_H
6+
#define MERVE_C_H
7+
8+
#include <stdbool.h>
9+
#include <stddef.h>
10+
#include <stdint.h>
11+
12+
/**
13+
* @brief Non-owning string reference.
14+
*
15+
* The data pointer is NOT null-terminated. Always use the length field.
16+
*
17+
* The data is valid as long as:
18+
* - The merve_analysis handle that produced it has not been freed.
19+
* - For string_view-backed exports: the original source buffer is alive.
20+
*/
21+
typedef struct {
22+
const char* data;
23+
size_t length;
24+
} merve_string;
25+
26+
/**
27+
* @brief Opaque handle to a CommonJS parse result.
28+
*
29+
* Created by merve_parse_commonjs(). Must be freed with merve_free().
30+
*/
31+
typedef void* merve_analysis;
32+
33+
/**
34+
* @brief Version number components.
35+
*/
36+
typedef struct {
37+
int major;
38+
int minor;
39+
int revision;
40+
} merve_version_components;
41+
42+
/* Error codes corresponding to lexer::lexer_error values. */
43+
#define MERVE_ERROR_TODO 0
44+
#define MERVE_ERROR_UNEXPECTED_PAREN 1
45+
#define MERVE_ERROR_UNEXPECTED_BRACE 2
46+
#define MERVE_ERROR_UNTERMINATED_PAREN 3
47+
#define MERVE_ERROR_UNTERMINATED_BRACE 4
48+
#define MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING 5
49+
#define MERVE_ERROR_UNTERMINATED_STRING_LITERAL 6
50+
#define MERVE_ERROR_UNTERMINATED_REGEX_CHARACTER_CLASS 7
51+
#define MERVE_ERROR_UNTERMINATED_REGEX 8
52+
#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META 9
53+
#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT 10
54+
#define MERVE_ERROR_UNEXPECTED_ESM_EXPORT 11
55+
#define MERVE_ERROR_TEMPLATE_NEST_OVERFLOW 12
56+
57+
#ifdef __cplusplus
58+
extern "C" {
59+
#endif
60+
61+
/**
62+
* Parse CommonJS source code and extract export information.
63+
*
64+
* The source buffer must remain valid while accessing string_view-backed
65+
* export names from the returned handle.
66+
*
67+
* You must call merve_free() on the returned handle when done.
68+
*
69+
* @param input Pointer to the JavaScript source (need not be null-terminated).
70+
* NULL is treated as an empty string.
71+
* @param length Length of the input in bytes.
72+
* @return A handle to the parse result, or NULL on out-of-memory.
73+
* Use merve_is_valid() to check if parsing succeeded.
74+
*/
75+
merve_analysis merve_parse_commonjs(const char* input, size_t length);
76+
77+
/**
78+
* Check whether the parse result is valid (parsing succeeded).
79+
*
80+
* @param result Handle returned by merve_parse_commonjs(). NULL returns false.
81+
* @return true if parsing succeeded, false otherwise.
82+
*/
83+
bool merve_is_valid(merve_analysis result);
84+
85+
/**
86+
* Free a parse result and all associated memory.
87+
*
88+
* @param result Handle returned by merve_parse_commonjs(). NULL is a no-op.
89+
*/
90+
void merve_free(merve_analysis result);
91+
92+
/**
93+
* Get the number of named exports found.
94+
*
95+
* @param result A parse result handle. NULL returns 0.
96+
* @return Number of exports, or 0 if result is NULL or invalid.
97+
*/
98+
size_t merve_get_exports_count(merve_analysis result);
99+
100+
/**
101+
* Get the number of re-export module specifiers found.
102+
*
103+
* @param result A parse result handle. NULL returns 0.
104+
* @return Number of re-exports, or 0 if result is NULL or invalid.
105+
*/
106+
size_t merve_get_reexports_count(merve_analysis result);
107+
108+
/**
109+
* Get the name of an export at the given index.
110+
*
111+
* @param result A valid parse result handle.
112+
* @param index Zero-based index (must be < merve_get_exports_count()).
113+
* @return Non-owning string reference. Returns {NULL, 0} on error.
114+
*/
115+
merve_string merve_get_export_name(merve_analysis result, size_t index);
116+
117+
/**
118+
* Get the 1-based source line number of an export.
119+
*
120+
* @param result A valid parse result handle.
121+
* @param index Zero-based index (must be < merve_get_exports_count()).
122+
* @return 1-based line number, or 0 on error.
123+
*/
124+
uint32_t merve_get_export_line(merve_analysis result, size_t index);
125+
126+
/**
127+
* Get the module specifier of a re-export at the given index.
128+
*
129+
* @param result A valid parse result handle.
130+
* @param index Zero-based index (must be < merve_get_reexports_count()).
131+
* @return Non-owning string reference. Returns {NULL, 0} on error.
132+
*/
133+
merve_string merve_get_reexport_name(merve_analysis result, size_t index);
134+
135+
/**
136+
* Get the 1-based source line number of a re-export.
137+
*
138+
* @param result A valid parse result handle.
139+
* @param index Zero-based index (must be < merve_get_reexports_count()).
140+
* @return 1-based line number, or 0 on error.
141+
*/
142+
uint32_t merve_get_reexport_line(merve_analysis result, size_t index);
143+
144+
/**
145+
* Get the error code from the last merve_parse_commonjs() call.
146+
*
147+
* @return One of the MERVE_ERROR_* constants, or -1 if the last parse
148+
* succeeded.
149+
* @note This is global state, overwritten by each merve_parse_commonjs() call.
150+
*/
151+
int merve_get_last_error(void);
152+
153+
/**
154+
* Get the merve library version string.
155+
*
156+
* @return Null-terminated version string (e.g. "1.0.1"). Never NULL.
157+
*/
158+
const char* merve_get_version(void);
159+
160+
/**
161+
* Get the merve library version as individual components.
162+
*
163+
* @return Struct with major, minor, and revision fields.
164+
*/
165+
merve_version_components merve_get_version_components(void);
166+
167+
#ifdef __cplusplus
168+
} /* extern "C" */
169+
#endif
170+
171+
#endif /* MERVE_C_H */

singleheader/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
set(SINGLEHEADER_FILES
55
${CMAKE_CURRENT_BINARY_DIR}/merve.cpp
66
${CMAKE_CURRENT_BINARY_DIR}/merve.h
7+
${CMAKE_CURRENT_BINARY_DIR}/merve_c.h
78
)
89
set_source_files_properties(${SINGLEHEADER_FILES} PROPERTIES GENERATED TRUE)
910

singleheader/amalgamate.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
AMALGAMATE_OUTPUT_PATH = os.environ["AMALGAMATE_OUTPUT_PATH"]
3535

3636
# this list excludes the "src/generic headers"
37-
ALLCFILES = ["parser.cpp"]
37+
ALLCFILES = ["parser.cpp", "merve_c.cpp"]
3838

3939
# order matters
4040
ALLCHEADERS = ["merve.h"]
@@ -138,11 +138,20 @@ def dofile(fid: str, prepath: str, filename: str) -> None:
138138

139139
amal_c.close()
140140

141+
# Copy merve_c.h to the output directory (it is already standalone).
142+
MERVE_C_H_SRC = os.path.join(AMALGAMATE_INCLUDE_PATH, "merve_c.h")
143+
MERVE_C_H_DST = os.path.join(AMALGAMATE_OUTPUT_PATH, "merve_c.h")
144+
if os.path.exists(MERVE_C_H_SRC):
145+
shutil.copy2(MERVE_C_H_SRC, MERVE_C_H_DST)
146+
print(f"Copied {MERVE_C_H_SRC} to {MERVE_C_H_DST}")
147+
141148
zf = zipfile.ZipFile(
142149
os.path.join(AMALGAMATE_OUTPUT_PATH, "singleheader.zip"), "w", zipfile.ZIP_DEFLATED
143150
)
144151
zf.write(os.path.join(AMALGAMATE_OUTPUT_PATH, OUTPUT_CPP), OUTPUT_CPP)
145152
zf.write(os.path.join(AMALGAMATE_OUTPUT_PATH, OUTPUT_H), OUTPUT_H)
153+
if os.path.exists(MERVE_C_H_DST):
154+
zf.write(MERVE_C_H_DST, "merve_c.h")
146155

147156

148157
print("Done with all files generation.")

src/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ message(STATUS "CMAKE_BUILD_TYPE : " ${CMAKE_BUILD_TYPE})
55
add_library(merve-include-source INTERFACE)
66
target_include_directories(merve-include-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
77
add_library(merve-source INTERFACE)
8-
target_sources(merve-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/parser.cpp)
8+
target_sources(merve-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/parser.cpp $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/merve_c.cpp)
99
target_link_libraries(merve-source INTERFACE merve-include-source)
10-
add_library(merve parser.cpp)
10+
add_library(merve parser.cpp merve_c.cpp)
1111
target_include_directories(merve PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> )
1212
target_include_directories(merve PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>")
1313

src/lexer.cpp

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)