Skip to content

Commit aabf044

Browse files
add starter codes and tests
1 parent 3321f32 commit aabf044

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+3990
-0
lines changed

.clang-format

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
BasedOnStyle: LLVM
3+
IndentWidth: 4 # 缩进宽度,LLVM 默认值为 2,改为 4
4+
AccessModifierOffset: -4 # public/protected/private 访问控制符相对成员的偏移,与 IndentWidth 配合,LLVM 默认值为 -2
5+
AlignOperands: AlignAfterOperator # 双目运算符的行间对齐,LLVM 默认值为 Align,改为带符号一起换行
6+
BreakBeforeBinaryOperators: All # 在双目运算符之前换行,LLVM 默认值为 None,改为换行时总是把双目运算符放在行首,包括赋值(=)
7+
ColumnLimit: 0 # 列宽限制,LLVM 默认值为 80,改为不限制
8+
AllowShortBlocksOnASingleLine: Always # 是否允许短块(单个语句的块)不换行,LLVM 默认值为 Never,改为允许
9+
AllowShortLoopsOnASingleLine: true # 是否允许短循环不换行,LLVM 默认值为 false,改为允许
10+
InsertBraces: true # 是否在 if/for/while/switch 等语句后插入大括号,LLVM 默认值为 false,改为允许
11+
BreakBeforeBraces: Custom # 大括号换行配置,LLVM 默认值为 LLVM,改为自定义以使 BraceWrapping 生效
12+
BraceWrapping:
13+
AfterCaseLabel: false
14+
AfterClass: false
15+
AfterControlStatement: Never
16+
AfterEnum: false
17+
AfterFunction: false
18+
AfterNamespace: false
19+
AfterObjCDeclaration: false
20+
AfterStruct: false
21+
AfterUnion: false
22+
AfterExternBlock: false
23+
BeforeCatch: false
24+
BeforeElse: false
25+
BeforeLambdaBody: false
26+
BeforeWhile: false
27+
IndentBraces: false
28+
SplitEmptyFunction: true
29+
SplitEmptyRecord: true
30+
SplitEmptyNamespace: true

.github/workflows/build.yaml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: Build and test
2+
on:
3+
pull_request:
4+
push:
5+
paths-ignore:
6+
- '**.md'
7+
- 'LICENSE'
8+
9+
jobs:
10+
build:
11+
name: Build
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
os: [windows-latest, ubuntu-latest]
16+
type: [release]
17+
runs-on: ${{ matrix.os }}
18+
steps:
19+
20+
- name: checkout code
21+
uses: actions/checkout@v4
22+
23+
- name: install xmake
24+
uses: xmake-io/github-action-setup-xmake@v1
25+
with:
26+
xmake-version: latest
27+
28+
- name: Xmake Build & Install
29+
run: |
30+
xmake
31+
xmake install
32+
33+
- name: Install Python
34+
run: |
35+
cd python
36+
pip install .
37+
cd ..
38+
39+
- name: Assignment-0
40+
run: |
41+
python test/test_runtime.py --device cpu
42+
43+
- name: Assignment-1
44+
run: |
45+
python test/test_tensor.py
46+
47+
- name: Assignment-2
48+
run: |
49+
python test/ops/add.py
50+
python test/ops/argmax.py
51+
python test/ops/embedding.py
52+
python test/ops/linear.py
53+
python test/ops/rms_norm.py
54+
python test/ops/rope.py
55+
python test/ops/self_attention.py
56+
python test/ops/swiglu.py
57+
58+
- name: Assignment-3
59+
run: |
60+
python test/test_infer.py --test

include/llaisys.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#ifndef __LLAISYS_H__
2+
#define __LLAISYS_H__
3+
4+
#if defined(_WIN32)
5+
#define __export __declspec(dllexport)
6+
#elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
7+
#define __export __attribute__((visibility("default")))
8+
#else
9+
#define __export
10+
#endif
11+
12+
#ifdef __cplusplus
13+
#define __C extern "C"
14+
#include <cstddef>
15+
#include <cstdint>
16+
#else
17+
#define __C
18+
#include <stddef.h>
19+
#include <stdint.h>
20+
#endif
21+
22+
// Device Types
23+
typedef enum {
24+
LLAISYS_DEVICE_CPU = 0,
25+
//// TODO: Add more device types here. Numbers need to be consecutive.
26+
LLAISYS_DEVICE_NVIDIA = 1,
27+
LLAISYS_DEVICE_TYPE_COUNT
28+
} llaisysDeviceType_t;
29+
30+
// Data Types
31+
typedef enum {
32+
LLAISYS_DTYPE_INVALID = 0,
33+
LLAISYS_DTYPE_BYTE = 1,
34+
LLAISYS_DTYPE_BOOL = 2,
35+
LLAISYS_DTYPE_I8 = 3,
36+
LLAISYS_DTYPE_I16 = 4,
37+
LLAISYS_DTYPE_I32 = 5,
38+
LLAISYS_DTYPE_I64 = 6,
39+
LLAISYS_DTYPE_U8 = 7,
40+
LLAISYS_DTYPE_U16 = 8,
41+
LLAISYS_DTYPE_U32 = 9,
42+
LLAISYS_DTYPE_U64 = 10,
43+
LLAISYS_DTYPE_F8 = 11,
44+
LLAISYS_DTYPE_F16 = 12,
45+
LLAISYS_DTYPE_F32 = 13,
46+
LLAISYS_DTYPE_F64 = 14,
47+
LLAISYS_DTYPE_C16 = 15,
48+
LLAISYS_DTYPE_C32 = 16,
49+
LLAISYS_DTYPE_C64 = 17,
50+
LLAISYS_DTYPE_C128 = 18,
51+
LLAISYS_DTYPE_BF16 = 19,
52+
} llaisysDataType_t;
53+
54+
// Runtime Types
55+
// Stream
56+
typedef void *llaisysStream_t;
57+
58+
// Memory Copy Directions
59+
typedef enum {
60+
LLAISYS_MEMCPY_H2H = 0,
61+
LLAISYS_MEMCPY_H2D = 1,
62+
LLAISYS_MEMCPY_D2H = 2,
63+
LLAISYS_MEMCPY_D2D = 3,
64+
} llaisysMemcpyKind_t;
65+
66+
#endif // __LLAISYS_H__

include/llaisys/models/qwen2.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef LLAISYS_MODELS_QWEN2_H
2+
#define LLAISYS_MODELS_QWEN2_H
3+
4+
#include "../tensor.h"
5+
6+
__C {
7+
struct LlaisysQwen2Meta {
8+
llaisysDataType_t dtype;
9+
size_t nlayer, hs, nh, nkvh, dh, di, maxseq, voc;
10+
float epsilon, theta;
11+
int64_t end_token;
12+
};
13+
14+
struct LlaisysQwen2Weights {
15+
llaisysTensor_t in_embed;
16+
llaisysTensor_t out_embed;
17+
llaisysTensor_t out_norm_w; // a.k.a. model.norm.weight
18+
llaisysTensor_t *attn_norm_w; // a.k.a. input_layernorm.weight
19+
llaisysTensor_t *attn_q_w;
20+
llaisysTensor_t *attn_q_b;
21+
llaisysTensor_t *attn_k_w;
22+
llaisysTensor_t *attn_k_b;
23+
llaisysTensor_t *attn_v_w;
24+
llaisysTensor_t *attn_v_b;
25+
llaisysTensor_t *attn_o_w;
26+
llaisysTensor_t *mlp_norm_w; // a.k.a. post_attention_layernorm.weight
27+
llaisysTensor_t *mlp_gate_w;
28+
llaisysTensor_t *mlp_up_w;
29+
llaisysTensor_t *mlp_down_w;
30+
};
31+
32+
struct LlaisysQwen2Model;
33+
34+
__export struct LlaisysQwen2Model *llaisysQwen2ModelCreate(const LlaisysQwen2Meta *meta, llaisysDeviceType_t device, int *device_ids, int ndevice);
35+
36+
__export void llaisysQwen2ModelDestroy(struct LlaisysQwen2Model * model);
37+
38+
__export struct LlaisysQwen2Weights *llaisysQwen2ModelWeights(struct LlaisysQwen2Model * model);
39+
40+
__export int64_t llaisysQwen2ModelInfer(struct LlaisysQwen2Model * model, int64_t * token_ids, size_t ntoken);
41+
}
42+
#endif // LLAISYS_MODELS_QWEN2_H

include/llaisys/ops.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef LLAISYS_OPS_H
2+
#define LLAISYS_OPS_H
3+
4+
#include "tensor.h"
5+
6+
__C {
7+
__export void llaisysAdd(llaisysTensor_t c, llaisysTensor_t a, llaisysTensor_t b);
8+
__export void llaisysArgmax(llaisysTensor_t max_idx, llaisysTensor_t max_val, llaisysTensor_t vals);
9+
__export void llaisysEmbedding(llaisysTensor_t out, llaisysTensor_t index, llaisysTensor_t weight);
10+
__export void llaisysLinear(llaisysTensor_t out, llaisysTensor_t in, llaisysTensor_t weight, llaisysTensor_t bias);
11+
__export void llaisysRearrange(llaisysTensor_t out, llaisysTensor_t in);
12+
__export void llaisysRmsNorm(llaisysTensor_t out, llaisysTensor_t in, llaisysTensor_t weight, float eps);
13+
__export void llaisysROPE(llaisysTensor_t out, llaisysTensor_t in, llaisysTensor_t pos_ids, float theta);
14+
__export void llaisysSelfAttention(llaisysTensor_t attn_val, llaisysTensor_t q, llaisysTensor_t k, llaisysTensor_t v, float scale);
15+
__export void llaisysSwiGLU(llaisysTensor_t out, llaisysTensor_t gate, llaisysTensor_t up);
16+
}
17+
18+
#endif

include/llaisys/runtime.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#ifndef LLAISYS_RUNTIME_H
2+
#define LLAISYS_RUNTIME_H
3+
4+
#include "../llaisys.h"
5+
6+
__C {
7+
// Runtime API Functions
8+
// Device
9+
typedef int (*get_device_count_api)();
10+
typedef void (*set_device_api)(int);
11+
typedef void (*device_synchronize_api)();
12+
// Stream
13+
typedef llaisysStream_t (*create_stream_api)();
14+
typedef void (*destroy_stream_api)(llaisysStream_t);
15+
typedef void (*stream_synchronize_api)(llaisysStream_t);
16+
// Memory
17+
typedef void *(*malloc_device_api)(size_t);
18+
typedef void (*free_device_api)(void *);
19+
typedef void *(*malloc_host_api)(size_t);
20+
typedef void (*free_host_api)(void *);
21+
// Memory copy
22+
typedef void (*memcpy_sync_api)(void *, const void *, size_t, llaisysMemcpyKind_t);
23+
typedef void (*memcpy_async_api)(void *, const void *, size_t, llaisysMemcpyKind_t, llaisysStream_t);
24+
25+
struct LlaisysRuntimeAPI {
26+
get_device_count_api get_device_count;
27+
set_device_api set_device;
28+
device_synchronize_api device_synchronize;
29+
create_stream_api create_stream;
30+
destroy_stream_api destroy_stream;
31+
stream_synchronize_api stream_synchronize;
32+
malloc_device_api malloc_device;
33+
free_device_api free_device;
34+
malloc_host_api malloc_host;
35+
free_host_api free_host;
36+
memcpy_sync_api memcpy_sync;
37+
memcpy_async_api memcpy_async;
38+
};
39+
40+
// Llaisys API for getting the runtime APIs
41+
__export const LlaisysRuntimeAPI *llaisysGetRuntimeAPI(llaisysDeviceType_t);
42+
43+
// Llaisys API for switching device context
44+
__export void llaisysSetContextRuntime(llaisysDeviceType_t, int);
45+
}
46+
47+
#endif // LLAISYS_RUNTIME_H

include/llaisys/tensor.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#ifndef LLAISYS_TENSOR_H
2+
#define LLAISYS_TENSOR_H
3+
4+
#include "../llaisys.h"
5+
6+
__C {
7+
typedef struct LlaisysTensor *llaisysTensor_t;
8+
9+
__export llaisysTensor_t tensorCreate(
10+
size_t * shape,
11+
size_t ndim,
12+
llaisysDataType_t dtype,
13+
llaisysDeviceType_t device_type,
14+
int device_id);
15+
16+
__export void tensorDestroy(
17+
llaisysTensor_t tensor);
18+
19+
__export void *tensorGetData(
20+
llaisysTensor_t tensor);
21+
22+
__export size_t tensorGetNdim(
23+
llaisysTensor_t tensor);
24+
25+
__export void tensorGetShape(
26+
llaisysTensor_t tensor,
27+
size_t * shape);
28+
29+
__export void tensorGetStrides(
30+
llaisysTensor_t tensor,
31+
ptrdiff_t * strides);
32+
33+
__export llaisysDataType_t tensorGetDataType(
34+
llaisysTensor_t tensor);
35+
36+
__export llaisysDeviceType_t tensorGetDeviceType(
37+
llaisysTensor_t tensor);
38+
39+
__export int tensorGetDeviceId(
40+
llaisysTensor_t tensor);
41+
42+
__export void tensorDebug(
43+
llaisysTensor_t tensor);
44+
45+
__export uint8_t tensorIsContiguous(
46+
llaisysTensor_t tensor);
47+
48+
__export void tensorLoad(
49+
llaisysTensor_t tensor,
50+
const void *data);
51+
52+
__export llaisysTensor_t tensorView(
53+
llaisysTensor_t tensor,
54+
size_t * shape,
55+
size_t ndim);
56+
57+
__export llaisysTensor_t tensorPermute(
58+
llaisysTensor_t tensor,
59+
size_t * order);
60+
61+
__export llaisysTensor_t tensorSlice(
62+
llaisysTensor_t tensor,
63+
size_t dim,
64+
size_t start,
65+
size_t end);
66+
}
67+
68+
#endif // LLAISYS_TENSOR_H

python/llaisys/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from .runtime import RuntimeAPI
2+
from .libllaisys import DeviceType
3+
from .libllaisys import DataType
4+
from .libllaisys import MemcpyKind
5+
from .libllaisys import llaisysStream_t as Stream
6+
from .tensor import Tensor
7+
from .ops import Ops
8+
from . import models
9+
from .models import *
10+
11+
__all__ = [
12+
"RuntimeAPI",
13+
"DeviceType",
14+
"DataType",
15+
"MemcpyKind",
16+
"Stream",
17+
"Tensor",
18+
"Ops",
19+
"models",
20+
]

0 commit comments

Comments
 (0)