|
| 1 | +# 向量数据库 |
| 2 | + |
| 3 | +同样的,ChatLuna 也支持接入向量数据库。 |
| 4 | + |
| 5 | +## 注册插件 |
| 6 | + |
| 7 | +所有需要接入功能到 ChatLuna 的插件,都得新建 `ChatLunaPlugin` 实例,并注册到 `ChatLuna` 服务中。 |
| 8 | + |
| 9 | +```typescript |
| 10 | +import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' |
| 11 | +import { Context, Schema } from 'koishi' |
| 12 | + |
| 13 | +export function apply(ctx: Context, config: Config) { |
| 14 | + const plugin = new ChatLunaPlugin(ctx, config, 'your-plugin-name') |
| 15 | + |
| 16 | + ctx.on('ready', async () => { |
| 17 | + // 在 ready 事件中注册到 ChatLuna 服务 |
| 18 | + plugin.registerToService() |
| 19 | + |
| 20 | + // 继续... |
| 21 | + }) |
| 22 | +} |
| 23 | +``` |
| 24 | + |
| 25 | +## 配置 Schema |
| 26 | + |
| 27 | +如果你的向量数据库需要连接 URL 等参数,则需要自行声明 Schema。 |
| 28 | + |
| 29 | +```typescript |
| 30 | +import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' |
| 31 | +import { Context, Schema } from 'koishi' |
| 32 | + |
| 33 | +export interface Config extends ChatLunaPlugin.Config { |
| 34 | + milvusUrl: string |
| 35 | + milvusUsername: string |
| 36 | + milvusPassword: string |
| 37 | +} |
| 38 | + |
| 39 | +export const Config: Schema<Config> = Schema.intersect([ |
| 40 | + Schema.object({ |
| 41 | + milvusUrl: Schema.string() |
| 42 | + .role('url') |
| 43 | + .default('http://127.0.0.1:19530'), |
| 44 | + milvusUsername: Schema.string().default(''), |
| 45 | + milvusPassword: Schema.string().role('secret').default('') |
| 46 | + }) |
| 47 | +]) as any |
| 48 | +``` |
| 49 | + |
| 50 | +例如上面的 Schema 中,就声明了 Milvus 的连接 URL、用户名和密码。 |
| 51 | + |
| 52 | +## 注册向量数据库 |
| 53 | + |
| 54 | +注册向量数据库非常简单,只需调用 `ChatLunaPlugin` 实例的 `registerVectorStore` 方法即可。 |
| 55 | + |
| 56 | +以 Milvus 为例,代码如下: |
| 57 | + |
| 58 | +```typescript |
| 59 | +const MilvusClass = await importMilvus() |
| 60 | + |
| 61 | +plugin.registerVectorStore('milvus', async (params) => { |
| 62 | + const embeddings = params.embeddings |
| 63 | + |
| 64 | + const vectorStore = new MilvusClass(embeddings, { |
| 65 | + collectionName: 'chatluna_collection', |
| 66 | + partitionName: params.key ?? 'chatluna', |
| 67 | + url: config.milvusUrl, |
| 68 | + username: config.milvusUsername, |
| 69 | + password: config.milvusPassword, |
| 70 | + textFieldMaxLength: 3000 |
| 71 | + }) |
| 72 | + |
| 73 | + const testVector = await embeddings.embedDocuments(['test']) |
| 74 | + |
| 75 | + try { |
| 76 | + await vectorStore.ensureCollection(testVector, [ |
| 77 | + { |
| 78 | + pageContent: 'test', |
| 79 | + metadata: {} |
| 80 | + } |
| 81 | + ]) |
| 82 | + |
| 83 | + await vectorStore.ensurePartition() |
| 84 | + |
| 85 | + await vectorStore.similaritySearchVectorWithScore(testVector[0], 10) |
| 86 | + } catch (e) { |
| 87 | + try { |
| 88 | + await vectorStore.client.releasePartitions({ |
| 89 | + collection_name: 'chatluna_collection', |
| 90 | + partition_names: [params.key ?? 'chatluna'] |
| 91 | + }) |
| 92 | + |
| 93 | + await vectorStore.client.releaseCollection({ |
| 94 | + collection_name: 'chatluna_collection' |
| 95 | + }) |
| 96 | + |
| 97 | + await vectorStore.client.dropPartition({ |
| 98 | + collection_name: 'chatluna_collection', |
| 99 | + partition_name: params.key ?? 'chatluna' |
| 100 | + }) |
| 101 | + |
| 102 | + await vectorStore.client.dropCollection({ |
| 103 | + collection_name: 'chatluna_collection' |
| 104 | + }) |
| 105 | + |
| 106 | + await vectorStore.ensureCollection(testVector, [ |
| 107 | + { |
| 108 | + pageContent: 'test', |
| 109 | + metadata: {} |
| 110 | + } |
| 111 | + ]) |
| 112 | + |
| 113 | + await vectorStore.ensurePartition() |
| 114 | + } catch (e) { |
| 115 | + logger.error(e) |
| 116 | + } |
| 117 | + logger.error(e) |
| 118 | + } |
| 119 | + |
| 120 | + const wrapperStore = new ChatLunaSaveableVectorStore<Milvus>( |
| 121 | + vectorStore, |
| 122 | + { |
| 123 | + async deletableFunction(store, options) { |
| 124 | + if (options.deleteAll) { |
| 125 | + await vectorStore.client.releasePartitions({ |
| 126 | + collection_name: 'chatluna_collection', |
| 127 | + partition_names: [params.key ?? 'chatluna'] |
| 128 | + }) |
| 129 | + |
| 130 | + await vectorStore.client.releaseCollection({ |
| 131 | + collection_name: 'chatluna_collection' |
| 132 | + }) |
| 133 | + |
| 134 | + await vectorStore.client.dropPartition({ |
| 135 | + collection_name: 'chatluna_collection', |
| 136 | + partition_name: params.key ?? 'chatluna' |
| 137 | + }) |
| 138 | + |
| 139 | + await vectorStore.client.dropCollection({ |
| 140 | + collection_name: 'chatluna_collection' |
| 141 | + }) |
| 142 | + |
| 143 | + return |
| 144 | + } |
| 145 | + |
| 146 | + const ids: string[] = [] |
| 147 | + if (options.ids) { |
| 148 | + ids.push(...options.ids) |
| 149 | + } |
| 150 | + |
| 151 | + if (options.documents) { |
| 152 | + const documentIds = options.documents |
| 153 | + ?.map((document) => { |
| 154 | + return document.metadata?.raw_id as |
| 155 | + | string |
| 156 | + | undefined |
| 157 | + }) |
| 158 | + .filter((id): id is string => id != null) |
| 159 | + |
| 160 | + ids.push(...documentIds) |
| 161 | + } |
| 162 | + |
| 163 | + if (ids.length < 1) { |
| 164 | + return |
| 165 | + } |
| 166 | + |
| 167 | + await store.delete({ |
| 168 | + ids |
| 169 | + }) |
| 170 | + }, |
| 171 | + async addDocumentsFunction(store, documents, options) { |
| 172 | + let ids = options?.ids ?? [] |
| 173 | + |
| 174 | + ids = documents.map((document, i) => { |
| 175 | + const id = ids[i] ?? crypto.randomUUID() |
| 176 | + |
| 177 | + document.metadata = { |
| 178 | + ...document.metadata, |
| 179 | + raw_id: id |
| 180 | + } |
| 181 | + |
| 182 | + return id |
| 183 | + }) |
| 184 | + |
| 185 | + await store.addDocuments(documents, { |
| 186 | + ids |
| 187 | + }) |
| 188 | + } |
| 189 | + } |
| 190 | + ) |
| 191 | + |
| 192 | + return wrapperStore |
| 193 | +}) |
| 194 | +``` |
| 195 | + |
| 196 | +在实现时需要注意以下几点: |
| 197 | + |
| 198 | +1. registerVectorStore 方法的第一个参数是向量数据库的名称,ChatLuna 会根据这个名称来调用对应的向量数据库。 |
| 199 | +2. `params` 参数中,`embeddings` 是 Embeddings 实例,`key` 是数据库实例的唯一标识,可以用于区分不同的数据库实例。 |
| 200 | +3. 返回值是一个 `ChatLunaSaveableVectorStore` 实例,ChatLuna 会根据这个实例来保存和读取数据。 |
| 201 | +4. 需要自行实现 `ChatLunaSaveableVectorStore` 的 `deletableFunction` 和 `addDocumentsFunction` 方法,用于支持向量数据库的保存和删除功能。 |
| 202 | + |
| 203 | +## 资源参考 |
| 204 | + |
| 205 | +请参考 ChatLuna 官方的向量数据库服务插件 [chatluna-vector-store](https://github.com/ChatLunaLab/chatluna/blob/v1-dev/packages/vector-store-service)。 |
0 commit comments