165 lines
5.1 KiB
Python
165 lines
5.1 KiB
Python
# import pandas as pd
|
||
# import chardet
|
||
#
|
||
# # 检测文件编码的函数
|
||
# def detect_encoding(file_path):
|
||
# with open(file_path, 'rb') as f:
|
||
# result = chardet.detect(f.read())
|
||
# return result['encoding']
|
||
#
|
||
# # Step 1: 读取 A.csv 并提取 input 列
|
||
# A_encoding = detect_encoding('D:/工作簿3.csv')
|
||
# A_df = pd.read_csv('D:/工作簿3.csv', encoding=A_encoding)
|
||
# list1 = A_df['input'].tolist()
|
||
#
|
||
# # Step 2: 读取 B.csv,基于 list1 中的内容查找匹配项
|
||
# B_encoding = detect_encoding('D:/工作簿2.csv')
|
||
# B_df = pd.read_csv('D:/工作簿2.csv', encoding=B_encoding)
|
||
#
|
||
# # 创建 list2 存储匹配到的 answer 列内容
|
||
# list2 = []
|
||
#
|
||
# # 遍历 list1,查找 B.csv 中的匹配项
|
||
# for item in list1:
|
||
# match = B_df[B_df['query'] == item]
|
||
# if not match.empty:
|
||
# # 如果有匹配项,取第一个匹配的 answer 值
|
||
# list2.append(match['answer'].values[0])
|
||
# else:
|
||
# # 如果没有匹配项,填充为 NaN 或其他默认值
|
||
# list2.append(None)
|
||
#
|
||
# # Step 3: 将 list2 添加到 A.csv 的 output 列
|
||
# A_df['output'] = list2
|
||
#
|
||
# # 保存修改后的 A.csv 文件
|
||
# A_df.to_csv('D:/A_updated2.csv', index=False, encoding='utf-8') # 保存时也指定编码,确保兼容性
|
||
#
|
||
# print("A.csv 已成功更新为 A_updated.csv")
|
||
|
||
import pandas as pd
|
||
import re
|
||
|
||
# 读取CSV文件
|
||
# df = pd.read_csv('D:/2.10.15.11.csv', encoding='utf-8')
|
||
#
|
||
# # 提取'点彩原因'列内容到list
|
||
# dian_cai_list = df['点彩原因'].tolist()
|
||
#
|
||
# # 初始化'点彩原因2'列
|
||
# df['点彩原因2'] = ''
|
||
#
|
||
# # 遍历列表,查找并提取"回答错误"或"回答正确"之后的内容,并修改原列
|
||
# for idx, content in enumerate(dian_cai_list):
|
||
# if isinstance(content, str): # 确保内容是字符串
|
||
# match = re.match(r'(回答错误|回答正确|回答不出)[,,]?\s*(.*)', content)
|
||
# if match:
|
||
# # 提取关键词(回答错误或回答正确)
|
||
# keyword = match.group(1)
|
||
# # 提取关键词后的内容
|
||
# extracted_content = match.group(2).strip()
|
||
#
|
||
# # 更新'点彩原因2'列
|
||
# df.at[idx, '点彩原因2'] = extracted_content
|
||
# # 修改'点彩原因'列,只保留关键词
|
||
# df.at[idx, '点彩原因'] = keyword
|
||
#
|
||
# # 保存更新后的CSV文件
|
||
# df.to_csv('D:/updated_file3.csv', index=False, encoding='utf-8')
|
||
|
||
|
||
# import requests
|
||
#
|
||
# url = "http://10.1.16.39:2333/intent_recognition"
|
||
# data = {
|
||
# "query": "BDY3是什么软件做的工程"
|
||
# }
|
||
#
|
||
# response = requests.post(url, json=data)
|
||
# print(response.json())
|
||
|
||
|
||
# import wikipedia
|
||
#
|
||
#
|
||
# def get_wikipedia_summary(term, lang="zh", sentences=2):
|
||
# """
|
||
# 使用 Wikipedia API 获取输入名词的解释或描述。
|
||
#
|
||
# 参数:
|
||
# - term: 需要查询的名词(字符串)
|
||
# - lang: 语言(默认 "zh" 为中文)
|
||
# - sentences: 返回的句子数量(默认 2 句)
|
||
#
|
||
# 返回:
|
||
# - Wikipedia 解释(字符串)
|
||
# """
|
||
# try:
|
||
# wikipedia.set_lang(lang) # 设置语言
|
||
# summary = wikipedia.summary(term, sentences=sentences)
|
||
# return summary
|
||
# except wikipedia.exceptions.DisambiguationError as e:
|
||
# return f"查询词 '{term}' 可能指多个内容:{', '.join(e.options[:5])}..."
|
||
# except wikipedia.exceptions.PageError:
|
||
# return f"未找到 '{term}' 的相关 Wikipedia 页面。"
|
||
# except Exception as e:
|
||
# return f"发生错误:{e}"
|
||
#
|
||
#
|
||
# # 测试示例
|
||
# print(get_wikipedia_summary("人工智能")) # 获取 "人工智能" 的解释
|
||
|
||
|
||
# import wikipedia
|
||
#
|
||
# def get_wikipedia_summary(term, lang="zh"):
|
||
# """
|
||
# 查询 Wikipedia API 获取名词的摘要信息。
|
||
#
|
||
# :param term: 要查询的名词
|
||
# :param lang: 语言(默认中文 'zh')
|
||
# :return: 该名词的 Wikipedia 摘要信息
|
||
# """
|
||
# try:
|
||
# wikipedia.set_lang(lang) # 设置语言
|
||
# summary = wikipedia.summary(term, sentences=3) # 获取前3句话摘要
|
||
# return summary
|
||
# except wikipedia.exceptions.DisambiguationError as e:
|
||
# return f"查询 '{term}' 有多个可能的结果,请更具体:\n{e.options[:5]}"
|
||
# except wikipedia.exceptions.PageError:
|
||
# return f"未找到 '{term}' 的 Wikipedia 词条。"
|
||
# except Exception as e:
|
||
# return f"查询失败,错误信息: {e}"
|
||
#
|
||
# import zhconv
|
||
#
|
||
# # 示例调用
|
||
# term = "历史版本"
|
||
# result = get_wikipedia_summary(term)
|
||
# traditional_text = zhconv.convert(result, 'zh-cn')
|
||
# print(traditional_text)
|
||
# # print(result)
|
||
|
||
|
||
import pandas as pd
|
||
|
||
# 读取 CSV 文件
|
||
file_path = "D:/测试集3.3.csv" # 请替换为你的文件路径
|
||
df = pd.read_csv(file_path, encoding='utf-8')
|
||
|
||
# 确保 'query' 列存在
|
||
if 'query' in df.columns:
|
||
# 筛选包含 '西藏' 的行
|
||
filtered_df = df[df['query'].str.contains('西藏', na=False)]
|
||
|
||
# 保存到新的 CSV 文件
|
||
output_path = "D:/测试集_西藏.csv"
|
||
filtered_df.to_csv(output_path, index=False, encoding='utf-8')
|
||
|
||
print(f"筛选后的数据已保存到 {output_path}")
|
||
else:
|
||
print("CSV 文件中没有 'query' 列,请检查文件格式!")
|
||
|
||
|
||
|