Qwen3-32B JSON格式化输出不稳定,微调输出格式
This commit is contained in:
+43
-43
@@ -1,46 +1,3 @@
|
||||
sk-cxlvgeuxavxfcajprxietuqyqjngtbrwrmrmrioxmgtbkpci
|
||||
sk-vjjsuzntqbhcmelfsuquqyoxjivxcfwyxnrhpwzobgxlpmrv
|
||||
sk-hbgctnpvntsnelveaudpekyncfgstdfazezboxmcgjvudzyg
|
||||
sk-cgdcrzedpetwuysiebjmioqkuqjsoouglspwtjvadgzosmwt
|
||||
sk-ymqpbisxufhthdjxerztucgecakvcsgajsdiicauvnaandir
|
||||
sk-xhmfarttmloxepegtvodtdwvpjveanvjigmkrjumxojkuydb
|
||||
sk-kdfrkixwoeiuzdsdtystcxbhvwzeschacrnvhfiupnilnpoo
|
||||
sk-kelmbfsruwmjgrzzbjkqpqasabrnuskongtqxwzrfrpefdgz
|
||||
sk-mtnlhcosddoztgmdgjmtlniqsrkfiskafmthtmnyzqmdfcib
|
||||
sk-cnrleygygpuqogpqyvuqnhubkwschrtcpjlldsdhxrkhgttf
|
||||
sk-xgkzyzptybrnxanvugkynczygyfavddwwzhbgoyxkhgaykci
|
||||
sk-nfpftnxyiovqlzkqsleadsfnbpfxdxbdpghdwfyucrnzaimh
|
||||
sk-uiuhaihdvthpmwqeehmwufagpxcjunoqnfgpnaitaaqgrjoi
|
||||
sk-mjnkfdboddkikmawuawsaxdsvwyuqxlaoroxcefdtrnycfud
|
||||
sk-xmjbjxxzhfzwpjihhyuhlxidzwmbsrtjdrpgxjgdnywhnovu
|
||||
sk-comhspmqhvnsaqasydwsrttntklshmbvisdlbpenyhmffngx
|
||||
sk-vflgmlkwgaxnddlhvcsuaphsfmkvilhpdgencksucoqwkezu
|
||||
sk-iyjhckfaoavtkwerohssthuygrqnntdaxrkgwgopgpdfdyhw
|
||||
sk-drjnrjwgxxkvpizznkxvjmgphttgfvrssglnljlxtsnpkqnn
|
||||
sk-kzvqvcfzmfjrtpzufbatbgpmyrziqoxzjehwyvtcnxpfxmul
|
||||
sk-movoypamgtzabventkegpyqnzvxunjkvnyvowbxoofxlkyxi
|
||||
sk-fnjeeylfnusqcyyoknqtqotgnkbyidzehufvtnpyctmhoevq
|
||||
sk-vdmkbtdqzhuaslwuzumkyyxxhytvvszqpkfrrwsllpbhcffh
|
||||
sk-tufbncngubzdhcdoydqoflmgigkmbogrxwqjvgmexcbmmhvb
|
||||
sk-svivotzqknqcgwpfcafebytzvwdxywwaioveyjqaxnrhmlps
|
||||
sk-zixqbkoxuvxkituttwtvdatcwpjvpsfyklenfqyoyivcsvcm
|
||||
sk-jebuvmnzsjcorvvcrkgnwvusefssnhcynymwxygsdzmcekpm
|
||||
sk-rfhnquzrxvcatgletqxjuxwwtjvowkfowmmbuwceysbpowep
|
||||
sk-txvxjzjhgijqggeexeszmbvflhjyikmcmmfnwzpyypziolla
|
||||
sk-lcmoiwzzzgmrmtlrdiqrnokroemjshdbfngputxdwbmsiikz
|
||||
sk-zaeziqwmxhsppqspwpwbmhparnlkjmrlhgazeuitfkljzmvy
|
||||
sk-drxthkrbfwirgjujaeaaykzxrpzxxthcpbfxmvqrwjyngbta
|
||||
sk-afffehmmkjbgseltoskctaqfzjtfnpcqmhnclecqjonzvjbc
|
||||
sk-hzmqvpyolijixllzrswwxzcgkezvhlmojssttzzjphadidie
|
||||
sk-utyouvzddsvugfgptkhckrtlzfpeqhxbmwvrnfwlulywsnnp
|
||||
sk-lavdxcqbvvsgpumjlflqnuzwkdsleohkouylerwovzikelux
|
||||
sk-uwdquxtaliiwrdkakpgoismnfcwftfzohcekmdwifokmyktv
|
||||
sk-exbjsnmerjsksepbgqtlvpsmbaptvrbasfbpcrdsblljjpmp
|
||||
sk-ksenojclzsyrhfkjqtmjxfiawopiivujzurnzpsmdasczvzv
|
||||
sk-fqogdmxyfiwjcjfnptthcylfqwyygbtkmhpchqoisxkcmqez
|
||||
sk-vtogkcmdkbdhujtsnhcjdoytafzzurpsjjxmzlbvrjfxtzox
|
||||
sk-twuvbdvmfswczhmuuktztxvfuxvxvdkcmejghjebxtowdbsc
|
||||
sk-iobumflqericicuvflscrsrbmlozlbbueldxlmltgaqvuyhg
|
||||
sk-ufnnhxfxfvmweezskajizwdcqynqmytbbkyctvkjypdyhjny
|
||||
sk-dcrokkzfvdkctooxqcbapzlkrtzchygzwwqvbzqcgghsesjq
|
||||
sk-pzzpojbvjouqdbrvmyyevmxlutcorwcgholvszwjxnkfurss
|
||||
@@ -101,6 +58,49 @@ sk-ufmqbuplpjvzzlzohvsxertwgnguhipsbajxnxecvvccozly
|
||||
sk-rypfoscrczeelowmrsixiuyunyqmqvknaprsnzmdguwzrkzx
|
||||
sk-lucemnosmcxuwedvzilpefuxjnyvaxldpbgaqwnwalxmntul
|
||||
sk-niymkyuzpyovndvvqvpaniiqfgoofnxczhdmjjessiocbeul
|
||||
sk-cxlvgeuxavxfcajprxietuqyqjngtbrwrmrmrioxmgtbkpci
|
||||
sk-vjjsuzntqbhcmelfsuquqyoxjivxcfwyxnrhpwzobgxlpmrv
|
||||
sk-hbgctnpvntsnelveaudpekyncfgstdfazezboxmcgjvudzyg
|
||||
sk-cgdcrzedpetwuysiebjmioqkuqjsoouglspwtjvadgzosmwt
|
||||
sk-ymqpbisxufhthdjxerztucgecakvcsgajsdiicauvnaandir
|
||||
sk-xhmfarttmloxepegtvodtdwvpjveanvjigmkrjumxojkuydb
|
||||
sk-kdfrkixwoeiuzdsdtystcxbhvwzeschacrnvhfiupnilnpoo
|
||||
sk-kelmbfsruwmjgrzzbjkqpqasabrnuskongtqxwzrfrpefdgz
|
||||
sk-mtnlhcosddoztgmdgjmtlniqsrkfiskafmthtmnyzqmdfcib
|
||||
sk-cnrleygygpuqogpqyvuqnhubkwschrtcpjlldsdhxrkhgttf
|
||||
sk-xgkzyzptybrnxanvugkynczygyfavddwwzhbgoyxkhgaykci
|
||||
sk-nfpftnxyiovqlzkqsleadsfnbpfxdxbdpghdwfyucrnzaimh
|
||||
sk-uiuhaihdvthpmwqeehmwufagpxcjunoqnfgpnaitaaqgrjoi
|
||||
sk-mjnkfdboddkikmawuawsaxdsvwyuqxlaoroxcefdtrnycfud
|
||||
sk-xmjbjxxzhfzwpjihhyuhlxidzwmbsrtjdrpgxjgdnywhnovu
|
||||
sk-comhspmqhvnsaqasydwsrttntklshmbvisdlbpenyhmffngx
|
||||
sk-vflgmlkwgaxnddlhvcsuaphsfmkvilhpdgencksucoqwkezu
|
||||
sk-iyjhckfaoavtkwerohssthuygrqnntdaxrkgwgopgpdfdyhw
|
||||
sk-drjnrjwgxxkvpizznkxvjmgphttgfvrssglnljlxtsnpkqnn
|
||||
sk-kzvqvcfzmfjrtpzufbatbgpmyrziqoxzjehwyvtcnxpfxmul
|
||||
sk-movoypamgtzabventkegpyqnzvxunjkvnyvowbxoofxlkyxi
|
||||
sk-fnjeeylfnusqcyyoknqtqotgnkbyidzehufvtnpyctmhoevq
|
||||
sk-vdmkbtdqzhuaslwuzumkyyxxhytvvszqpkfrrwsllpbhcffh
|
||||
sk-tufbncngubzdhcdoydqoflmgigkmbogrxwqjvgmexcbmmhvb
|
||||
sk-svivotzqknqcgwpfcafebytzvwdxywwaioveyjqaxnrhmlps
|
||||
sk-zixqbkoxuvxkituttwtvdatcwpjvpsfyklenfqyoyivcsvcm
|
||||
sk-jebuvmnzsjcorvvcrkgnwvusefssnhcynymwxygsdzmcekpm
|
||||
sk-rfhnquzrxvcatgletqxjuxwwtjvowkfowmmbuwceysbpowep
|
||||
sk-txvxjzjhgijqggeexeszmbvflhjyikmcmmfnwzpyypziolla
|
||||
sk-lcmoiwzzzgmrmtlrdiqrnokroemjshdbfngputxdwbmsiikz
|
||||
sk-zaeziqwmxhsppqspwpwbmhparnlkjmrlhgazeuitfkljzmvy
|
||||
sk-drxthkrbfwirgjujaeaaykzxrpzxxthcpbfxmvqrwjyngbta
|
||||
sk-afffehmmkjbgseltoskctaqfzjtfnpcqmhnclecqjonzvjbc
|
||||
sk-hzmqvpyolijixllzrswwxzcgkezvhlmojssttzzjphadidie
|
||||
sk-utyouvzddsvugfgptkhckrtlzfpeqhxbmwvrnfwlulywsnnp
|
||||
sk-lavdxcqbvvsgpumjlflqnuzwkdsleohkouylerwovzikelux
|
||||
sk-uwdquxtaliiwrdkakpgoismnfcwftfzohcekmdwifokmyktv
|
||||
sk-exbjsnmerjsksepbgqtlvpsmbaptvrbasfbpcrdsblljjpmp
|
||||
sk-ksenojclzsyrhfkjqtmjxfiawopiivujzurnzpsmdasczvzv
|
||||
sk-fqogdmxyfiwjcjfnptthcylfqwyygbtkmhpchqoisxkcmqez
|
||||
sk-vtogkcmdkbdhujtsnhcjdoytafzzurpsjjxmzlbvrjfxtzox
|
||||
sk-twuvbdvmfswczhmuuktztxvfuxvxvdkcmejghjebxtowdbsc
|
||||
sk-iobumflqericicuvflscrsrbmlozlbbueldxlmltgaqvuyhg
|
||||
sk-gkdrgcbrlddkgflolrqkpqyygmuszueozeoiwdqpeqghlths
|
||||
sk-gjqsqttmnydxavthjsjnirqxprogmvidmocfeoixuwutiqmd
|
||||
sk-pihgxlhhdsyeowjsbhbioxwqjbonwunteaubfisduzyibkzw
|
||||
|
||||
@@ -107,9 +107,34 @@ class Classification(BaseModel):
|
||||
vertical_classification:str = Field(description="垂直领域一级分类")
|
||||
sub_classification:str = Field(description="一级分类下的二级分类")
|
||||
|
||||
@classmethod
|
||||
def get_format_instructions(cls):
|
||||
return """
|
||||
格式如下,必须严格以纯JSON格式输出
|
||||
{
|
||||
"vertical_classification": "垂直领域一级分类",
|
||||
"sub_classification": "一级分类下的二级分类"
|
||||
}
|
||||
字段说明:
|
||||
vertical_classification 类型:str 描述:垂直领域一级分类
|
||||
sub_classification 类型:str 描述:一级分类下的二级分类
|
||||
|
||||
"""
|
||||
|
||||
class QueryRewrite(BaseModel):
|
||||
rewrite:str = Field(description="问题改写")
|
||||
|
||||
@classmethod
|
||||
def get_format_instructions(cls):
|
||||
return """
|
||||
格式如下:必须严格以纯JSON格式输出
|
||||
{
|
||||
"rewrite": "问题改写"
|
||||
}
|
||||
字段说明:
|
||||
rewrite 类型:str 描述:问题改写之后的内容
|
||||
"""
|
||||
|
||||
##########################槽位模型###########################
|
||||
class SlotBase(BaseModel):
|
||||
"""槽位基础模型"""
|
||||
@@ -312,6 +337,22 @@ class StepBackPrompt(BaseModel):
|
||||
can_use_back_prompt: bool = Field(description="原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语,则不能进行后退提示")
|
||||
step_back_query: List[str] = Field(description="后退提示生成的抽象查询(多个)")
|
||||
|
||||
@classmethod
|
||||
def get_format_instructions(cls):
|
||||
return """
|
||||
格式如下,必须严格以纯JSON格式输出
|
||||
{
|
||||
"original_query": "原始查询",
|
||||
"can_use_back_prompt": "原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语,则不能进行后退提示",
|
||||
"step_back_query": "后退提示生成的抽象查询(多个)"
|
||||
}
|
||||
字段说明:
|
||||
original_query 类型:str 描述:用户输入的原始查询
|
||||
can_use_back_prompt 类型:bool 描述:原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语,则不能进行后退提示
|
||||
step_back_query 类型:List[str] 描述:后退提示生成的抽象查询(多个)
|
||||
"""
|
||||
|
||||
|
||||
class FollowUpQuestions(BaseModel):
|
||||
"""后续问题数据模型"""
|
||||
original_query: str = Field(description="原始查询")
|
||||
|
||||
@@ -188,17 +188,19 @@ class AsyncIntentRecognizer:
|
||||
classification_parser = PydanticOutputParser(pydantic_object=Classification)
|
||||
formatted_prompt = classification_prompt.format(user_input=query,
|
||||
classification_info=classification_info,
|
||||
output_format=classification_parser.get_format_instructions(),
|
||||
output_format=Classification.get_format_instructions(),
|
||||
# conversation_context=conversation_context,
|
||||
chat_history=json.dumps(chat_history, ensure_ascii=False))
|
||||
# 解析输出
|
||||
try:
|
||||
# 异步调用LLM
|
||||
response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
|
||||
# 尝试直接解析JSON响应
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
parsed_output = classification_parser.parse(clean_output)
|
||||
|
||||
# 计算并打印耗时
|
||||
@@ -262,11 +264,13 @@ class AsyncIntentRecognizer:
|
||||
formatted_prompt = formatted_prompt.replace("{output_format}", terms_list_parser.get_format_instructions())
|
||||
|
||||
# 异步调用LLM
|
||||
response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
|
||||
# 尝试使用Pydantic解析器解析TermList
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
parsed_output = terms_list_parser.parse(clean_output)
|
||||
return parsed_output.terms
|
||||
|
||||
@@ -335,18 +339,22 @@ class AsyncIntentRecognizer:
|
||||
</chat_history>
|
||||
|
||||
1、请从当前提问内容中提取电力造价行中定额编码、定额名称、清单编码、清单名称
|
||||
2、请勿随机编造,如果没有提取到,返回空内容
|
||||
3、返回结果为json格式
|
||||
2、请勿随机编造,如果没有提取到内容返回空的JSON
|
||||
3、返回结果为json格式,必须严格以纯JSON格式输出
|
||||
```json
|
||||
{{
|
||||
"dinge_info_list":{{"dinge_code_list":["xxxx","xxxx"], "dinge_name_list":["xxxx","xxxx"]}},
|
||||
"qingdan_info":{{"qingdan_code_list":["xxxx","xxxx"], "qingdan_name_list":["xxxx","xxxx"]}}
|
||||
}}
|
||||
```json
|
||||
"""
|
||||
|
||||
try:
|
||||
response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(prompt, extra_body={"enable_thinking": False})
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
parsed_output = JsonOutputParser().parse(clean_output)
|
||||
|
||||
# 计算并打印耗时
|
||||
@@ -378,16 +386,18 @@ class AsyncIntentRecognizer:
|
||||
keywords_str = json.dumps(terms_dict, ensure_ascii=False)
|
||||
query_rewrite_parser = PydanticOutputParser(pydantic_object=QueryRewrite)
|
||||
formatted_prompt = query_rewrite_prompt_pro.format(query=query,
|
||||
output_format=query_rewrite_parser.get_format_instructions(),
|
||||
output_format=QueryRewrite.get_format_instructions(),
|
||||
keywords=keywords_str,
|
||||
chat_history=chat_history,
|
||||
context=context)
|
||||
# 解析输出
|
||||
try:
|
||||
# 异步调用LLM
|
||||
response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
parsed_output = query_rewrite_parser.parse(clean_output)
|
||||
end_time = time.time()
|
||||
process_time=end_time-start_time
|
||||
@@ -630,9 +640,11 @@ class AsyncIntentRecognizer:
|
||||
)
|
||||
try:
|
||||
# 异步调用LLM
|
||||
response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
# 尝试解析LLM响应
|
||||
parsed_output = slot_parser.parse(clean_output)
|
||||
return parsed_output
|
||||
@@ -660,16 +672,18 @@ class AsyncIntentRecognizer:
|
||||
query=query,
|
||||
chat_history=json.dumps(chat_history, ensure_ascii=False) if chat_history else "[]",
|
||||
# conversation_context=conversation_context,
|
||||
output_format=step_back_parser.get_format_instructions()
|
||||
output_format=StepBackPrompt.get_format_instructions()
|
||||
)
|
||||
|
||||
try:
|
||||
# 异步调用LLM
|
||||
response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
|
||||
# response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
|
||||
|
||||
# 解析输出
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
clean_output = re.sub(r'\s+', '', clean_output)
|
||||
parsed_output = step_back_parser.parse(clean_output)
|
||||
step_back_end_time = time.time()
|
||||
step_back_time = step_back_end_time - step_back_start_time
|
||||
|
||||
@@ -298,7 +298,7 @@ step_back_prompt = """
|
||||
后退问题:
|
||||
{{
|
||||
"original_query": "2023版本如何在Windows 11系统上导入单位工程量清单?",
|
||||
"can_use_back_prompt": True,
|
||||
"can_use_back_prompt": true,
|
||||
"step_back_query": ["如何在Windows 11系统上导入单位工程量清单?", "如何导入单位工程量清单?"]
|
||||
}}
|
||||
|
||||
@@ -306,7 +306,7 @@ step_back_prompt = """
|
||||
后退问题:
|
||||
{{
|
||||
"original_query": "某个设备更换后,如何在系统中更新对应的定额?",
|
||||
"can_use_back_prompt": True,
|
||||
"can_use_back_prompt": true,
|
||||
"step_back_query": ["如何更新设备对应的定额?", "如何更新定额?"]
|
||||
}}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user