📄 classifier.ts • 9367 bytes
/**
* 意图识别 - 分类器(关键词 + 向量语义双路融合)
* Phase 1: 基于关键词 + 模式匹配
* Phase 2: 向量语义相似度辅助(处理口语化/模糊表达)
*/
import { INTENT_PATTERNS, extractParams, type IntentType, type IntentPattern } from './patterns'
/** 意图识别结果 */
export interface IntentResult {
type: IntentType
confidence: number // 0-1 置信度
params: Record<string, any>
suggestedTools?: string[]
fastPath: boolean
reason: string
/** 向量匹配的相似种子(可选,用于调试) */
vectorMatches?: string[]
}
/** 工具建议映射 */
const TOOL_SUGGESTIONS: Partial<Record<IntentType, string[]>> = {
code: ['file_write', 'bash_run'],
debug: ['grep_search', 'file_read', 'bash_run'],
file: ['file_read', 'file_write', 'file_edit', 'list_dir'],
search: ['grep_search', 'file_read'],
test: ['bash_run', 'file_write'],
config: ['file_read', 'file_write', 'bash_run'],
skill: ['bash_run', 'file_read'],
}
/**
* 计算匹配分数(关键词 + 正则)
*/
function calculateScore(text: string, pattern: IntentPattern): number {
const lowerText = text.toLowerCase()
let score = 0
// 关键词匹配
for (const keyword of pattern.keywords) {
const lowerKeyword = keyword.toLowerCase()
if (lowerText.includes(lowerKeyword)) {
// 完全匹配权重更高
if (lowerText === lowerKeyword) {
score += 20
} else if (lowerText.startsWith(lowerKeyword)) {
score += 10
} else {
score += 5
}
}
}
// 正则匹配
if (pattern.regex) {
for (const regex of pattern.regex) {
if (regex.test(text)) {
score += 15
}
}
}
return score
}
/**
* 关键词分类(同步,快速路径)
*/
function classifyByKeywords(input: string): { type: IntentType; confidence: number; score: number; pattern: IntentPattern | null } {
const cleanInput = input.trim()
if (!cleanInput) {
return { type: 'unknown', confidence: 0, score: 0, pattern: null }
}
let bestMatch: IntentPattern | null = null
let bestScore = 0
for (const pattern of INTENT_PATTERNS) {
const score = calculateScore(cleanInput, pattern)
if (score > bestScore) {
bestScore = score
bestMatch = pattern
}
}
let confidence = 0
if (bestMatch && bestScore > 0) {
confidence = Math.min(1, (bestScore / 20) * (bestMatch.priority / 100))
}
return {
type: bestMatch?.type || 'unknown',
confidence,
score: bestScore,
pattern: bestMatch,
}
}
/**
* 向量语义分类(异步,需要API调用)
* 通过搜索意图种子库,找到最相似的种子语句,推断意图类型
*/
async function classifyByVector(input: string): Promise<{
type: IntentType
confidence: number
matches: string[]
} | null> {
try {
// 延迟导入避免循环依赖
const { searchVectors } = await import('../memory/vectorSearch')
// 搜索最相似的5条种子(纯向量搜索,保留distance信息)
const results = await searchVectors(input, '__intent_seeds__', 5)
if (results.length === 0) {
return null
}
// 计算相似度(distance越小越相似,转为0-1的相似度)
const similarities = results.map(r => 1 - r.distance)
const avgSimilarity = similarities.reduce((a, b) => a + b, 0) / similarities.length
const maxSimilarity = Math.max(...similarities)
// 对每条匹配的种子,用关键词分类推断其意图类型
const typeScores = new Map<IntentType, number>()
for (const result of results) {
const seedClassification = classifyByKeywords(result.content)
if (seedClassification.type !== 'unknown') {
const current = typeScores.get(seedClassification.type) || 0
// 加权:相似度越高权重越大
typeScores.set(seedClassification.type, current + (1 - result.distance))
}
}
// 找到得分最高的意图类型
let bestType: IntentType = 'unknown'
let bestScore = 0
for (const [type, score] of typeScores) {
if (score > bestScore) {
bestScore = score
bestType = type
}
}
// 向量匹配置信度:基于最大相似度
const confidence = maxSimilarity > 0.6 ? Math.min(0.8, maxSimilarity * 0.9) : maxSimilarity * 0.6
return {
type: bestType,
confidence,
matches: results.map(r => r.content),
}
} catch (e) {
// 向量搜索失败不影响主流程
return null
}
}
/**
* 主分类函数(同步版本,保持向后兼容)
* 仅使用关键词匹配,适用于不需要向量搜索的场景
*/
export function classifyIntent(input: string): IntentResult {
const cleanInput = input.trim()
if (!cleanInput) {
return {
type: 'unknown',
confidence: 0,
params: {},
fastPath: true,
reason: '空输入',
}
}
const kw = classifyByKeywords(cleanInput)
// 如果关键词置信度足够高,直接返回
if (kw.confidence >= 0.3 || kw.score >= 10) {
return {
type: kw.type,
confidence: kw.confidence,
params: extractParams(cleanInput),
suggestedTools: TOOL_SUGGESTIONS[kw.type],
fastPath: kw.pattern?.fastPath ?? true,
reason: `关键词匹配: ${kw.type},分数 ${kw.score}`,
}
}
// 置信度低,标记为 unknown(向量搜索会在异步版本中补充)
return {
type: 'unknown',
confidence: 0,
params: extractParams(cleanInput),
fastPath: true,
reason: '关键词匹配置信度不足',
}
}
/**
* 异步分类函数(关键词 + 向量语义双路融合)
* 当关键词匹配置信度低时,使用向量搜索补充
*/
export async function classifyIntentAsync(input: string): Promise<IntentResult> {
const cleanInput = input.trim()
if (!cleanInput) {
return {
type: 'unknown',
confidence: 0,
params: {},
fastPath: true,
reason: '空输入',
}
}
// 1. 关键词分类(快速)
const kw = classifyByKeywords(cleanInput)
// 2. 如果关键词置信度足够高,直接返回(跳过向量搜索,节省API调用)
if (kw.confidence >= 0.5 || kw.score >= 15) {
return {
type: kw.type,
confidence: kw.confidence,
params: extractParams(cleanInput),
suggestedTools: TOOL_SUGGESTIONS[kw.type],
fastPath: kw.pattern?.fastPath ?? true,
reason: `关键词匹配: ${kw.type},分数 ${kw.score}`,
}
}
// 3. 关键词置信度中等或低,尝试向量搜索补充
const vec = await classifyByVector(cleanInput)
if (vec && vec.confidence > kw.confidence && vec.type !== 'unknown') {
// 向量匹配更好,使用向量结果
const pattern = INTENT_PATTERNS.find(p => p.type === vec.type)
return {
type: vec.type,
confidence: vec.confidence,
params: extractParams(cleanInput),
suggestedTools: TOOL_SUGGESTIONS[vec.type],
fastPath: pattern?.fastPath ?? false,
reason: `向量语义匹配: ${vec.type},相似度 ${vec.confidence.toFixed(2)}`,
vectorMatches: vec.matches,
}
}
// 4. 关键词结果更好或向量无结果,使用关键词结果
if (kw.score > 0) {
return {
type: kw.type,
confidence: kw.confidence,
params: extractParams(cleanInput),
suggestedTools: TOOL_SUGGESTIONS[kw.type],
fastPath: kw.pattern?.fastPath ?? true,
reason: `关键词匹配: ${kw.type},分数 ${kw.score}`,
}
}
// 5. 都没匹配到
return {
type: 'unknown',
confidence: 0,
params: extractParams(cleanInput),
fastPath: true,
reason: '未匹配到明确意图',
}
}
/**
* 批量分类(用于历史消息分析,仅关键词)
*/
export function classifyBatch(inputs: string[]): IntentResult[] {
return inputs.map(input => classifyIntent(input))
}
/**
* 获取意图类型的中文描述
*/
export function getIntentLabel(type: IntentType): string {
const labels: Record<IntentType, string> = {
code: '💻 代码生成',
debug: '🔧 调试修复',
explain: '📖 解释说明',
refactor: '♻️ 重构优化',
test: '🧪 测试相关',
chat: '💬 闲聊问答',
skill: '🎯 技能执行',
file: '📁 文件操作',
search: '🔍 搜索查询',
config: '⚙️ 配置相关',
unknown: '❓ 未知',
}
return labels[type] || labels.unknown
}
/**
* 获取快速通道的描述
*/
export function getFastPathReason(fastPath: boolean, type: IntentType): string {
if (fastPath) {
const reasons: Record<IntentType, string> = {
chat: '闲聊问答,直接回复',
explain: '解释说明类任务,简单直接回复',
skill: '技能执行,直接调用技能处理器',
code: '简单代码生成,直接生成',
debug: '调试任务,需要详细分析',
refactor: '重构任务,需要详细规划',
test: '测试任务,需要详细规划',
file: '文件操作,需要验证路径',
search: '搜索任务,需要验证查询',
config: '配置任务,需要验证环境',
unknown: '未知任务,需要进一步确认',
}
return reasons[type] || '快速通道'
}
return '复杂任务,需要执行计划'
}