diff --git a/packages/components/nodes/retrievers/WeaviateRetriever/HybridSearchRetriever.ts b/packages/components/nodes/retrievers/WeaviateRetriever/HybridSearchRetriever.ts new file mode 100644 index 00000000000..cedabb26ed0 --- /dev/null +++ b/packages/components/nodes/retrievers/WeaviateRetriever/HybridSearchRetriever.ts @@ -0,0 +1,111 @@ +import { WeaviateStore } from '@langchain/weaviate' +import { VectorStoreRetriever, VectorStoreRetrieverInput } from '@langchain/core/vectorstores' +import { Document } from '@langchain/core/documents' +import { get } from 'lodash' + +type WeaviateHybridInput = Omit, 'k'> & { + alpha: number + topK: number + resultFormat?: string + fusionType?: string +} + +export class HybridSearchRetriever extends VectorStoreRetriever { + resultFormat: string + alpha: number + topK: number + fusionType: string + + constructor(input: WeaviateHybridInput) { + super(input) + this.vectorStore = input.vectorStore + this.alpha = input.alpha + this.topK = input.topK + this.fusionType = input.fusionType ?? 'RelativeScore' + } + + async _getRelevantDocuments(query: string): Promise { + const results = await this.vectorStore.hybridSearch(query, { + limit: this.topK, + alpha: this.alpha, + fusionType: this.fusionType, + filters: this.filter + }) + if (this.resultFormat != undefined) { + return results.map((doc) => { + let resContent = this.resultFormat.replace(/{{context}}/g, doc.pageContent) + resContent = replaceMetadata(resContent, doc.metadata) + + return new Document({ + pageContent: resContent, + metadata: doc.metadata + }) + }) + } else { + return results + } + } + + static fromVectorStore(vectorStore: V, options: Omit, 'vectorStore'>) { + return new this({ ...options, vectorStore }) + } +} + +function replaceMetadata(template: string, metadata: Record): string { + const metadataRegex = /{{metadata\.([\w.]+)}}/g + return template.replace(metadataRegex, (match, path) => { + const value = get(metadata, path) + return value !== undefined ? String(value) : match + }) +} + +export const processSearchFilter = (filterInput: any, client: any, indexName: string) => { + if (!filterInput) return undefined + let rawFilter = filterInput?.where ?? filterInput + + if (rawFilter.operator === 'And' || rawFilter.operator === 'Or') { + const subFilters = rawFilter.operands?.map((operand: any) => processSearchFilter(operand, client, indexName)).filter(Boolean) + + if (!subFilters?.length) return undefined + + return rawFilter.operator === 'And' + ? subFilters.reduce((acc: any, f: any) => acc.and(f)) + : subFilters.reduce((acc: any, f: any) => acc.or(f)) + } + + if (rawFilter?.path && rawFilter?.operator) { + const propName = Array.isArray(rawFilter.path) ? rawFilter.path[0] : rawFilter.path + const operator = rawFilter.operator + const propValue = + rawFilter.valueText ?? + rawFilter.valueString ?? + rawFilter.valueInt ?? + rawFilter.valueNumber ?? + rawFilter.valueBoolean ?? + rawFilter.valueDate ?? + rawFilter.valueTextArray ?? + rawFilter.valueStringArray ?? + rawFilter.valueIntArray ?? + rawFilter.valueNumberArray ?? + rawFilter.valueBooleanArray ?? + rawFilter.valueDateArray + + const filter = client.collections.get(indexName).filter.byProperty(propName) + + const operatorMap: Record any> = { + Equal: (v) => filter.equal(v), + NotEqual: (v) => filter.notEqual(v), + GreaterThan: (v) => filter.greaterThan(v), + GreaterThanEqual: (v) => filter.greaterOrEqual(v), + LessThan: (v) => filter.lessThan(v), + LessThanEqual: (v) => filter.lessOrEqual(v), + Like: (v) => filter.like(v), + ContainsAny: (v) => filter.containsAny(v), + ContainsAll: (v) => filter.containsAll(v) + } + + return operatorMap[operator]?.(propValue) + } + + return undefined +} diff --git a/packages/components/nodes/retrievers/WeaviateRetriever/WeaviateRetriever.ts b/packages/components/nodes/retrievers/WeaviateRetriever/WeaviateRetriever.ts new file mode 100644 index 00000000000..d08a3b4f551 --- /dev/null +++ b/packages/components/nodes/retrievers/WeaviateRetriever/WeaviateRetriever.ts @@ -0,0 +1,141 @@ +import { WeaviateStore } from '@langchain/weaviate' +import { INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface' +import { handleEscapeCharacters } from '../../../src' +import { HybridSearchRetriever } from './HybridSearchRetriever' + +const defaultReturnFormat = '{{context}}\nSource: {{metadata.source}}' + +class WeaviateRetriever_Retrievers implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + outputs: INodeOutputsValue[] + + constructor() { + this.label = 'Weaviate Retriever' + this.name = 'weaviateRetriever' + this.version = 1.0 + this.type = 'WeaviateRetriever' + this.icon = 'weaviateRetriever.png' + this.category = 'Retrievers' + this.description = 'Weaviate hybrid search combining vector similarity and BM25 keyword search' + this.baseClasses = [this.type, 'BaseRetriever'] + this.inputs = [ + { + label: 'Weaviate Vector Store', + name: 'vectorStore', + type: 'VectorStore' + }, + { + label: 'Query', + name: 'query', + type: 'string', + description: 'Query to retrieve documents from retriever. If not specified, user question will be used', + optional: true, + acceptVariable: true + }, + { + label: 'Result Format', + name: 'resultFormat', + type: 'string', + rows: 4, + description: + 'Format to return the results in. Use {{context}} to insert the pageContent of the document and {{metadata.key}} to insert metadata values.', + default: defaultReturnFormat + }, + { + label: 'Alpha', + name: 'alpha', + type: 'number', + description: + 'Number between 0 and 1 that determines the weighting of keyword (BM25) portion of the hybrid search. A value of 1 is a pure vector search, while 0 is a pure keyword search.', + default: 0.5, + step: 0.1, + optional: true + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to vector store topK', + placeholder: '4', + type: 'number', + optional: true + }, + { + label: 'fusionType', + name: 'fusionType', + type: 'options', + default: 'RelativeScore', + description: + "Method to merge results: 'Ranked' combines by document rank, while 'RelativeScore' combines by normalized scores.", + options: [ + { + label: 'RelativeScore', + name: 'RelativeScore' + }, + { + label: 'Ranked', + name: 'Ranked' + } + ], + optional: true + } + ] + this.outputs = [ + { + label: 'Weaviate Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Document', + name: 'document', + description: 'Array of document objects containing metadata and pageContent', + baseClasses: ['Document', 'json'] + }, + { + label: 'Text', + name: 'text', + description: 'Concatenated string from pageContent of documents', + baseClasses: ['string', 'json'] + } + ] + } + + async init(nodeData: INodeData, input: string): Promise { + const vectorStore = nodeData.inputs?.vectorStore as WeaviateStore + const query = nodeData.inputs?.query as string + const topK = nodeData.inputs?.topK as string + const alpha = nodeData.inputs?.alpha as string + const resultFormat = nodeData.inputs?.resultFormat as string + const fusionType = nodeData.inputs?.fusionType as string + const output = nodeData.outputs?.output as string + + const retriever = HybridSearchRetriever.fromVectorStore(vectorStore, { + resultFormat, + alpha: alpha ? parseFloat(alpha) : 0.5, + topK: topK ? parseInt(topK, 10) : 4, + fusionType: fusionType ?? 'RelativeScore' + }) + + const searchPath = query ? query : input + + if (output === 'retriever') return retriever + else if (output === 'document') return await retriever._getRelevantDocuments(searchPath) + else if (output === 'text') { + const docs = await retriever._getRelevantDocuments(searchPath) + const finaltext = docs.map((doc) => doc.pageContent).join('\n') + return handleEscapeCharacters(finaltext, false) + } + + return retriever + } +} + +module.exports = { nodeClass: WeaviateRetriever_Retrievers } diff --git a/packages/components/nodes/retrievers/WeaviateRetriever/weaviateRetriever.png b/packages/components/nodes/retrievers/WeaviateRetriever/weaviateRetriever.png new file mode 100644 index 00000000000..25a39e33894 Binary files /dev/null and b/packages/components/nodes/retrievers/WeaviateRetriever/weaviateRetriever.png differ diff --git a/packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts b/packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts index 61ac1dfaa1f..0973bf3fa52 100644 --- a/packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts +++ b/packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts @@ -7,6 +7,7 @@ import { getBaseClasses, resolveFlowObjValue, parseWithTypeConversion } from '.. import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents' import { RunnableConfig } from '@langchain/core/runnables' import { VectorStoreRetriever } from '@langchain/core/vectorstores' +import { processSearchFilter } from '../../retrievers/WeaviateRetriever/HybridSearchRetriever' const howToUse = `Add additional filters to vector store. You can also filter with flow config, including the current "state": - \`$flow.sessionId\` @@ -203,7 +204,17 @@ class Retriever_Tools implements INode { if (newMetadataFilter && typeof newMetadataFilter === 'object' && Object.keys(newMetadataFilter).length > 0) { const vectorStore = (retriever as VectorStoreRetriever).vectorStore - vectorStore.filter = newMetadataFilter + if (vectorStore.constructor.name === 'WeaviateStore' || vectorStore.lc_namespace?.includes('weaviate')) { + const client = (vectorStore as any).client + const indexName = (vectorStore as any).indexName + if (client && indexName) { + const newWeaviateMetadataFilter = processSearchFilter(newMetadataFilter, client, indexName) + const weaviateRetriever = retriever as VectorStoreRetriever + weaviateRetriever.filter = newWeaviateMetadataFilter + } + } else { + vectorStore.filter = newMetadataFilter + } } } const docs = await retriever.invoke(input) diff --git a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts index ac88610038e..2e7e8285c75 100644 --- a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts +++ b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts @@ -5,9 +5,8 @@ import { Document } from '@langchain/core/documents' import { Embeddings } from '@langchain/core/embeddings' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface' import { getBaseClasses, getCredentialData, getCredentialParam, normalizeKeysRecursively, parseJsonBody } from '../../../src/utils' -import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils' import { index } from '../../../src/indexing' -import { VectorStore } from '@langchain/core/vectorstores' +import { HybridSearchRetriever, processSearchFilter } from '../../retrievers/WeaviateRetriever/HybridSearchRetriever' /** * Parses a host string into host and optional port. @@ -231,19 +230,92 @@ class Weaviate_VectorStores implements INode { additionalParams: true, optional: true, acceptVariable: true + }, + { + label: 'Search Type', + name: 'searchType', + type: 'options', + default: 'similarity', + options: [ + { + label: 'Similarity', + name: 'similarity' + }, + { + label: 'Max Marginal Relevance', + name: 'mmr' + }, + { + label: 'Hybrid Search', + name: 'hybrid' + } + ], + additionalParams: true, + optional: true + }, + { + label: 'Fetch K', + name: 'fetchK', + description: 'Number of initial documents to fetch for MMR reranking. Default to 20. Used only when the search type is MMR', + placeholder: '20', + type: 'number', + additionalParams: true, + optional: true, + show: { + searchType: ['mmr'] + } + }, + { + label: 'Lambda', + name: 'lambda', + description: + 'Number between 0 and 1 that determines the degree of diversity among the results, where 0 corresponds to maximum diversity and 1 to minimum diversity. Used only when the search type is MMR', + placeholder: '0.5', + type: 'number', + additionalParams: true, + optional: true, + show: { + searchType: ['mmr'] + } + }, + { + label: 'Alpha', + name: 'alpha', + description: + 'Number between 0 and 1 that determines the weighting of keyword (BM25) portion of the hybrid search. A value of 1 is a pure vector search, while 0 is a pure keyword search.', + placeholder: '1', + type: 'number', + additionalParams: true, + optional: true, + show: { + searchType: ['hybrid'] + } + }, + { + label: 'fusionType', + name: 'fusionType', + type: 'options', + default: 'RelativeScore', + description: + "Method to merge results: 'Ranked' combines by document rank, while 'RelativeScore' combines by normalized scores.", + options: [ + { + label: 'RelativeScore', + name: 'RelativeScore' + }, + { + label: 'Ranked', + name: 'Ranked' + } + ], + additionalParams: true, + optional: true, + show: { + searchType: ['hybrid'] + } } ] - addMMRInputParams(this.inputs) - this.inputs.push({ - label: 'Alpha (for Hybrid Search)', - name: 'alpha', - description: - 'Number between 0 and 1 that determines the weighting of keyword (BM25) portion of the hybrid search. A value of 1 is a pure vector search, while 0 is a pure keyword search.', - placeholder: '1', - type: 'number', - additionalParams: true, - optional: true - }) + this.outputs = [ { label: 'Weaviate Retriever', @@ -307,7 +379,7 @@ class Weaviate_VectorStores implements INode { try { if (recordManager) { - const vectorStore = (await WeaviateStore.fromExistingIndex(embeddings, obj)) as unknown as VectorStore + const vectorStore = (await WeaviateStore.fromExistingIndex(embeddings, obj)) as unknown as WeaviateStore await recordManager.createSchema() const res = await index({ docsSource: finalDocs, @@ -394,6 +466,12 @@ class Weaviate_VectorStores implements INode { const weaviateIndex = nodeData.inputs?.weaviateIndex as string const weaviateTextKey = nodeData.inputs?.weaviateTextKey as string const weaviateMetadataKeys = nodeData.inputs?.weaviateMetadataKeys as string + const output = nodeData.outputs?.output as string + const searchType = nodeData.inputs?.searchType as string + const fusionType = nodeData.inputs?.fusionType as string + const topK = nodeData.inputs?.topK as string + const k = topK ? parseInt(topK, 10) : 4 + const alpha = nodeData.inputs?.alpha const embeddings = nodeData.inputs?.embeddings as Embeddings let weaviateFilter = nodeData.inputs?.weaviateFilter @@ -418,12 +496,45 @@ class Weaviate_VectorStores implements INode { if (weaviateTextKey) obj.textKey = weaviateTextKey if (weaviateMetadataKeys) obj.metadataKeys = JSON.parse(weaviateMetadataKeys.replace(/\s/g, '')) if (weaviateFilter) { - weaviateFilter = typeof weaviateFilter === 'object' ? weaviateFilter : parseJsonBody(weaviateFilter) + const rawFilter = typeof weaviateFilter === 'object' ? weaviateFilter : parseJsonBody(weaviateFilter) + weaviateFilter = processSearchFilter(rawFilter, client, weaviateIndex) } - const vectorStore = (await WeaviateStore.fromExistingIndex(embeddings, obj)) as unknown as VectorStore + const vectorStore = (await WeaviateStore.fromExistingIndex(embeddings, obj)) as unknown as WeaviateStore - return resolveVectorStoreOrRetriever(nodeData, vectorStore, weaviateFilter) + if (output === 'retriever') { + if ('mmr' === searchType) { + const fetchK = nodeData.inputs?.fetchK as string + const lambda = nodeData.inputs?.lambda as string + const f = fetchK ? parseInt(fetchK) : 20 + const l = lambda ? parseFloat(lambda) : 0.5 + return vectorStore.asRetriever({ + searchType: 'mmr', + k: k, + filter: weaviateFilter, + searchKwargs: { + fetchK: f, + lambda: l + } + }) + } else if ('hybrid' === searchType) { + return new HybridSearchRetriever({ + vectorStore: vectorStore, + alpha: alpha ? parseFloat(alpha) : 1, + topK: k, + fusionType: fusionType ?? 'RelativeScore', + filter: weaviateFilter + }) + } else { + return vectorStore.asRetriever({ + k: k, + filter: weaviateFilter + }) + } + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } } }