tenseleyflow/documentlanguagemodel / 11ca4a1

Browse files

feat(vendor): add llama.cpp submodule pinned at b8816 + refresh pre-tokenizer labels

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
11ca4a1312e8e81d1acd15ca29737c5aa3bf833e
Parents
6443ebd
Tree
a7bb355

4 changed files

StatusFile+-
A .gitmodules 3 0
M pyproject.toml 2 0
A vendor/llama.cpp 1 0
A vendor/llama_cpp_pretokenizer_hashes.json 76 0
.gitmodulesadded
@@ -0,0 +1,3 @@
1
+[submodule "vendor/llama.cpp"]
2
+	path = vendor/llama.cpp
3
+	url = https://github.com/ggml-org/llama.cpp
pyproject.tomlmodified
@@ -64,6 +64,8 @@ packages = ["src/dlm"]
6464
 line-length = 100
6565
 target-version = "py311"
6666
 src = ["src", "tests"]
67
+# Vendored third-party code (llama.cpp) — don't lint or format it.
68
+extend-exclude = ["vendor/llama.cpp"]
6769
 
6870
 [tool.ruff.lint]
6971
 select = [
vendor/llama.cppadded
@@ -0,0 +1,1 @@
1
+Subproject commit 3f7c29d318e317b63f54c558bc69803963d7d88c
vendor/llama_cpp_pretokenizer_hashes.jsonadded
@@ -0,0 +1,76 @@
1
+[
2
+  "a.x-4.0",
3
+  "afmoe",
4
+  "bailingmoe",
5
+  "bailingmoe2",
6
+  "bert-bge",
7
+  "bert-bge-large",
8
+  "bloom",
9
+  "chameleon",
10
+  "chatglm-bpe",
11
+  "codeshell",
12
+  "command-r",
13
+  "dbrx",
14
+  "deepseek-coder",
15
+  "deepseek-llm",
16
+  "deepseek-r1-qwen",
17
+  "deepseek-v3",
18
+  "exaone",
19
+  "exaone-moe",
20
+  "exaone4",
21
+  "f2llmv2",
22
+  "falcon",
23
+  "falcon-h1",
24
+  "falcon3",
25
+  "gigachat",
26
+  "glm4",
27
+  "gpt-2",
28
+  "gpt-4o",
29
+  "gpt3-finnish",
30
+  "granite-docling",
31
+  "grok-2",
32
+  "hunyuan",
33
+  "hunyuan-dense",
34
+  "jais",
35
+  "jais-2",
36
+  "jina-v1-en",
37
+  "jina-v2-code",
38
+  "jina-v2-de",
39
+  "jina-v2-en",
40
+  "jina-v2-es",
41
+  "jina-v5-nano",
42
+  "joyai-llm",
43
+  "kanana2",
44
+  "kimi-k2",
45
+  "kormo",
46
+  "lfm2",
47
+  "llama-bpe",
48
+  "llama4",
49
+  "megrez",
50
+  "mellum",
51
+  "midm-2.0",
52
+  "minerva-7b",
53
+  "minimax-m2",
54
+  "modern-bert",
55
+  "mpt",
56
+  "olmo",
57
+  "phi-2",
58
+  "pixtral",
59
+  "poro-chat",
60
+  "qwen2",
61
+  "qwen35",
62
+  "refact",
63
+  "roberta-bpe",
64
+  "seed-coder",
65
+  "smaug-bpe",
66
+  "smollm",
67
+  "solar-open",
68
+  "stablelm2",
69
+  "starcoder",
70
+  "superbpe",
71
+  "tekken",
72
+  "tiny_aya",
73
+  "trillion",
74
+  "viking",
75
+  "youtu"
76
+]