Files
DM_rewrite_3.31/booway_kg_api/ceshi.py
T
2025-03-31 15:17:47 +08:00

165 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# import pandas as pd
# import chardet
#
# # 检测文件编码的函数
# def detect_encoding(file_path):
# with open(file_path, 'rb') as f:
# result = chardet.detect(f.read())
# return result['encoding']
#
# # Step 1: 读取 A.csv 并提取 input 列
# A_encoding = detect_encoding('D:/工作簿3.csv')
# A_df = pd.read_csv('D:/工作簿3.csv', encoding=A_encoding)
# list1 = A_df['input'].tolist()
#
# # Step 2: 读取 B.csv,基于 list1 中的内容查找匹配项
# B_encoding = detect_encoding('D:/工作簿2.csv')
# B_df = pd.read_csv('D:/工作簿2.csv', encoding=B_encoding)
#
# # 创建 list2 存储匹配到的 answer 列内容
# list2 = []
#
# # 遍历 list1,查找 B.csv 中的匹配项
# for item in list1:
# match = B_df[B_df['query'] == item]
# if not match.empty:
# # 如果有匹配项,取第一个匹配的 answer 值
# list2.append(match['answer'].values[0])
# else:
# # 如果没有匹配项,填充为 NaN 或其他默认值
# list2.append(None)
#
# # Step 3: 将 list2 添加到 A.csv 的 output 列
# A_df['output'] = list2
#
# # 保存修改后的 A.csv 文件
# A_df.to_csv('D:/A_updated2.csv', index=False, encoding='utf-8') # 保存时也指定编码,确保兼容性
#
# print("A.csv 已成功更新为 A_updated.csv")
import pandas as pd
import re
# 读取CSV文件
# df = pd.read_csv('D:/2.10.15.11.csv', encoding='utf-8')
#
# # 提取'点彩原因'列内容到list
# dian_cai_list = df['点彩原因'].tolist()
#
# # 初始化'点彩原因2'列
# df['点彩原因2'] = ''
#
# # 遍历列表,查找并提取"回答错误"或"回答正确"之后的内容,并修改原列
# for idx, content in enumerate(dian_cai_list):
# if isinstance(content, str): # 确保内容是字符串
# match = re.match(r'(回答错误|回答正确|回答不出)[,]?\s*(.*)', content)
# if match:
# # 提取关键词(回答错误或回答正确)
# keyword = match.group(1)
# # 提取关键词后的内容
# extracted_content = match.group(2).strip()
#
# # 更新'点彩原因2'列
# df.at[idx, '点彩原因2'] = extracted_content
# # 修改'点彩原因'列,只保留关键词
# df.at[idx, '点彩原因'] = keyword
#
# # 保存更新后的CSV文件
# df.to_csv('D:/updated_file3.csv', index=False, encoding='utf-8')
# import requests
#
# url = "http://10.1.16.39:2333/intent_recognition"
# data = {
# "query": "BDY3是什么软件做的工程"
# }
#
# response = requests.post(url, json=data)
# print(response.json())
# import wikipedia
#
#
# def get_wikipedia_summary(term, lang="zh", sentences=2):
# """
# 使用 Wikipedia API 获取输入名词的解释或描述。
#
# 参数:
# - term: 需要查询的名词(字符串)
# - lang: 语言(默认 "zh" 为中文)
# - sentences: 返回的句子数量(默认 2 句)
#
# 返回:
# - Wikipedia 解释(字符串)
# """
# try:
# wikipedia.set_lang(lang) # 设置语言
# summary = wikipedia.summary(term, sentences=sentences)
# return summary
# except wikipedia.exceptions.DisambiguationError as e:
# return f"查询词 '{term}' 可能指多个内容:{', '.join(e.options[:5])}..."
# except wikipedia.exceptions.PageError:
# return f"未找到 '{term}' 的相关 Wikipedia 页面。"
# except Exception as e:
# return f"发生错误:{e}"
#
#
# # 测试示例
# print(get_wikipedia_summary("人工智能")) # 获取 "人工智能" 的解释
# import wikipedia
#
# def get_wikipedia_summary(term, lang="zh"):
# """
# 查询 Wikipedia API 获取名词的摘要信息。
#
# :param term: 要查询的名词
# :param lang: 语言(默认中文 'zh'
# :return: 该名词的 Wikipedia 摘要信息
# """
# try:
# wikipedia.set_lang(lang) # 设置语言
# summary = wikipedia.summary(term, sentences=3) # 获取前3句话摘要
# return summary
# except wikipedia.exceptions.DisambiguationError as e:
# return f"查询 '{term}' 有多个可能的结果,请更具体:\n{e.options[:5]}"
# except wikipedia.exceptions.PageError:
# return f"未找到 '{term}' 的 Wikipedia 词条。"
# except Exception as e:
# return f"查询失败,错误信息: {e}"
#
# import zhconv
#
# # 示例调用
# term = "历史版本"
# result = get_wikipedia_summary(term)
# traditional_text = zhconv.convert(result, 'zh-cn')
# print(traditional_text)
# # print(result)
import pandas as pd
# 读取 CSV 文件
file_path = "D:/测试集3.3.csv" # 请替换为你的文件路径
df = pd.read_csv(file_path, encoding='utf-8')
# 确保 'query' 列存在
if 'query' in df.columns:
# 筛选包含 '西藏' 的行
filtered_df = df[df['query'].str.contains('西藏', na=False)]
# 保存到新的 CSV 文件
output_path = "D:/测试集_西藏.csv"
filtered_df.to_csv(output_path, index=False, encoding='utf-8')
print(f"筛选后的数据已保存到 {output_path}")
else:
print("CSV 文件中没有 'query' 列,请检查文件格式!")