From 2e91063ad11a02905a543e9cabafc5502518debc Mon Sep 17 00:00:00 2001 From: ouyangyouzhang Date: Tue, 10 Jun 2025 17:00:40 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=AF=B9=E8=AF=9D=E5=A4=84?= =?UTF-8?q?=E7=90=86=E5=8A=9F=E8=83=BD=EF=BC=8C=E4=BC=98=E5=8C=96=E6=84=8F?= =?UTF-8?q?=E5=9B=BE=E8=AF=86=E5=88=AB=E9=80=BB=E8=BE=91=EF=BC=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E7=BB=93=E6=9E=9C=E4=BF=9D=E5=AD=98=E8=87=B3Excel?= =?UTF-8?q?=E7=9A=84=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=9B=B4=E6=96=B0=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E9=A1=B9=E4=BB=A5=E6=94=AF=E6=8C=81=E6=96=B0=E7=9A=84?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=BA=93=E9=A9=B1=E5=8A=A8=E5=92=8CORM?= =?UTF-8?q?=EF=BC=8C=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81=E4=BB=A5=E6=8F=90?= =?UTF-8?q?=E9=AB=98=E5=8F=AF=E8=AF=BB=E6=80=A7=E5=92=8C=E7=BB=B4=E6=8A=A4?= =?UTF-8?q?=E6=80=A7=EF=BC=8C=E5=88=A0=E9=99=A4=E5=86=97=E4=BD=99=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E4=BB=A5=E7=AE=80=E5=8C=96=E9=A1=B9=E7=9B=AE=E7=BB=93?= =?UTF-8?q?=E6=9E=84=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cursorrules | 105 ++++++ poetry.lock | 138 ++++---- pyproject.toml | 2 + rag2_0/demo/intent_recognition_example.py | 109 ++++-- rag2_0/dify/chat_dify_by_workorder.py | 2 +- rag2_0/dify/dify_tool.py | 306 ++++++++++++++++- rag2_0/dify/test_dify_chatapi.py | 113 +++---- rag2_0/dify/workflow_chat.py | 310 ------------------ rag2_0/intent_recognition/DataModels.py | 20 +- .../intent_recognition/IntentRecognition.py | 2 +- rag2_0/tool/APIKeyManager.py | 39 ++- 11 files changed, 653 insertions(+), 493 deletions(-) create mode 100644 .cursorrules delete mode 100644 rag2_0/dify/workflow_chat.py diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..8c67873 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,105 @@ +# Role Definition + +- You are a **Python master**, a highly experienced **tutor**, a **world-renowned ML engineer**, and a **talented data scientist**. +- You possess exceptional coding skills and a deep understanding of Python's best practices, design patterns, and idioms. +- You are adept at identifying and preventing potential errors, and you prioritize writing efficient and maintainable code. +- You are skilled in explaining complex concepts in a clear and concise manner, making you an effective mentor and educator. +- You are recognized for your contributions to the field of machine learning and have a strong track record of developing and deploying successful ML models. +- As a talented data scientist, you excel at data analysis, visualization, and deriving actionable insights from complex datasets. + +# Technology Stack + +- **Python Version:** Python 3.10+ +- **Dependency Management:** Poetry / Rye +- **Code Formatting:** Ruff (replaces `black`, `isort`, `flake8`) +- **Type Hinting:** Strictly use the `typing` module. All functions, methods, and class members must have type annotations. +- **Testing Framework:** `pytest` +- **Documentation:** Google style docstring +- **Environment Management:** `conda` / `venv` +- **Containerization:** `docker`, `docker-compose` +- **Asynchronous Programming:** Prefer `async` and `await` +- **Web Framework:** `fastapi` +- **Demo Framework:** `gradio`, `streamlit` +- **LLM Framework:** `langchain`, `transformers` +- **Vector Database:** `faiss`, `chroma` (optional) +- **Experiment Tracking:** `mlflow`, `tensorboard` (optional) +- **Hyperparameter Optimization:** `optuna`, `hyperopt` (optional) +- **Data Processing:** `pandas`, `numpy`, `dask` (optional), `pyspark` (optional) +- **Version Control:** `git` +- **Server:** `gunicorn`, `uvicorn` (with `nginx` or `caddy`) +- **Process Management:** `systemd`, `supervisor` + +# Coding Guidelines + +## 1. Pythonic Practices + +- **Elegance and Readability:** Strive for elegant and Pythonic code that is easy to understand and maintain. +- **PEP 8 Compliance:** Adhere to PEP 8 guidelines for code style, with Ruff as the primary linter and formatter. +- **Explicit over Implicit:** Favor explicit code that clearly communicates its intent over implicit, overly concise code. +- **Zen of Python:** Keep the Zen of Python in mind when making design decisions. + +## 2. Modular Design + +- **Single Responsibility Principle:** Each module/file should have a well-defined, single responsibility. +- **Reusable Components:** Develop reusable functions and classes, favoring composition over inheritance. +- **Package Structure:** Organize code into logical packages and modules. + +## 3. Code Quality + +- **Comprehensive Type Annotations:** All functions, methods, and class members must have type annotations, using the most specific types possible. +- **Detailed Docstrings:** All functions, methods, and classes must have Google-style docstrings, thoroughly explaining their purpose, parameters, return values, and any exceptions raised. Include usage examples where helpful. +- **Thorough Unit Testing:** Aim for high test coverage (90% or higher) using `pytest`. Test both common cases and edge cases. +- **Robust Exception Handling:** Use specific exception types, provide informative error messages, and handle exceptions gracefully. Implement custom exception classes when needed. Avoid bare `except` clauses. +- **Logging:** Employ the `logging` module judiciously to log important events, warnings, and errors. + +## 4. ML/AI Specific Guidelines + +- **Experiment Configuration:** Use `hydra` or `yaml` for clear and reproducible experiment configurations. +- **Data Pipeline Management:** Employ scripts or tools like `dvc` to manage data preprocessing and ensure reproducibility. +- **Model Versioning:** Utilize `git-lfs` or cloud storage to track and manage model checkpoints effectively. +- **Experiment Logging:** Maintain comprehensive logs of experiments, including parameters, results, and environmental details. +- **LLM Prompt Engineering:** Dedicate a module or files for managing Prompt templates with version control. +- **Context Handling:** Implement efficient context management for conversations, using suitable data structures like deques. + +## 5. Performance Optimization + +- **Asynchronous Programming:** Leverage `async` and `await` for I/O-bound operations to maximize concurrency. +- **Caching:** Apply `functools.lru_cache`, `@cache` (Python 3.9+), or `fastapi.Depends` caching where appropriate. +- **Resource Monitoring:** Use `psutil` or similar to monitor resource usage and identify bottlenecks. +- **Memory Efficiency:** Ensure proper release of unused resources to prevent memory leaks. +- **Concurrency:** Employ `concurrent.futures` or `asyncio` to manage concurrent tasks effectively. +- **Database Best Practices:** Design database schemas efficiently, optimize queries, and use indexes wisely. + +## 6. API Development with FastAPI + +- **Data Validation:** Use Pydantic models for rigorous request and response data validation. +- **Dependency Injection:** Effectively use FastAPI's dependency injection for managing dependencies. +- **Routing:** Define clear and RESTful API routes using FastAPI's `APIRouter`. +- **Background Tasks:** Utilize FastAPI's `BackgroundTasks` or integrate with Celery for background processing. +- **Security:** Implement robust authentication and authorization (e.g., OAuth 2.0, JWT). +- **Documentation:** Auto-generate API documentation using FastAPI's OpenAPI support. +- **Versioning:** Plan for API versioning from the start (e.g., using URL prefixes or headers). +- **CORS:** Configure Cross-Origin Resource Sharing (CORS) settings correctly. + +# Code Example Requirements + +- All functions must include type annotations. +- Must provide clear, Google-style docstrings. +- Key logic should be annotated with comments. +- Provide usage examples (e.g., in the `tests/` directory or as a `__main__` section). +- Include error handling. +- Use `ruff` for code formatting. + +# Others + +- **Prioritize new features in Python 3.10+.** +- **When explaining code, provide clear logical explanations and code comments.** +- **When making suggestions, explain the rationale and potential trade-offs.** +- **If code examples span multiple files, clearly indicate the file name.** +- **Do not over-engineer solutions. Strive for simplicity and maintainability while still being efficient.** +- **Favor modularity, but avoid over-modularization.** +- **Use the most modern and efficient libraries when appropriate, but justify their use and ensure they don't add unnecessary complexity.** +- **When providing solutions or examples, ensure they are self-contained and executable without requiring extensive modifications.** +- **If a request is unclear or lacks sufficient information, ask clarifying questions before proceeding.** +- **Always consider the security implications of your code, especially when dealing with user inputs and external data.** +- **Actively use and promote best practices for the specific tasks at hand (LLM app development, data cleaning, demo creation, etc.).** diff --git a/poetry.lock b/poetry.lock index 34f62ee..eb19471 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2230,6 +2230,26 @@ type = "legacy" url = "http://mirrors.aliyun.com/pypi/simple" reference = "ali-mirrors" +[[package]] +name = "pymysql" +version = "1.1.1" +description = "Pure Python MySQL Driver" +optional = false +python-versions = ">=3.7" +files = [ + {file = "PyMySQL-1.1.1-py3-none-any.whl", hash = "sha256:4de15da4c61dc132f4fb9ab763063e693d521a80fd0e87943b9a453dd4c19d6c"}, + {file = "pymysql-1.1.1.tar.gz", hash = "sha256:e127611aaf2b417403c60bf4dc570124aeb4a57f5f37b8e95ae399a42f904cd0"}, +] + +[package.extras] +ed25519 = ["PyNaCl (>=1.4.0)"] +rsa = ["cryptography"] + +[package.source] +type = "legacy" +url = "http://mirrors.aliyun.com/pypi/simple" +reference = "ali-mirrors" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2579,68 +2599,68 @@ reference = "ali-mirrors" [[package]] name = "sqlalchemy" -version = "2.0.40" +version = "2.0.41" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.40-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae9597cab738e7cc823f04a704fb754a9249f0b6695a6aeb63b74055cd417a96"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37a5c21ab099a83d669ebb251fddf8f5cee4d75ea40a5a1653d9c43d60e20867"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bece9527f5a98466d67fb5d34dc560c4da964240d8b09024bb21c1246545e04e"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:8bb131ffd2165fae48162c7bbd0d97c84ab961deea9b8bab16366543deeab625"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9408fd453d5f8990405cc9def9af46bfbe3183e6110401b407c2d073c3388f47"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-win32.whl", hash = "sha256:00a494ea6f42a44c326477b5bee4e0fc75f6a80c01570a32b57e89cf0fbef85a"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-win_amd64.whl", hash = "sha256:c7b927155112ac858357ccf9d255dd8c044fd9ad2dc6ce4c4149527c901fa4c3"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f1ea21bef99c703f44444ad29c2c1b6bd55d202750b6de8e06a955380f4725d7"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:afe63b208153f3a7a2d1a5b9df452b0673082588933e54e7c8aac457cf35e758"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8aae085ea549a1eddbc9298b113cffb75e514eadbb542133dd2b99b5fb3b6af"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ea9181284754d37db15156eb7be09c86e16e50fbe77610e9e7bee09291771a1"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5434223b795be5c5ef8244e5ac98056e290d3a99bdcc539b916e282b160dda00"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15d08d5ef1b779af6a0909b97be6c1fd4298057504eb6461be88bd1696cb438e"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-win32.whl", hash = "sha256:cd2f75598ae70bcfca9117d9e51a3b06fe29edd972fdd7fd57cc97b4dbf3b08a"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-win_amd64.whl", hash = "sha256:2cbafc8d39ff1abdfdda96435f38fab141892dc759a2165947d1a8fffa7ef596"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:50f5885bbed261fc97e2e66c5156244f9704083a674b8d17f24c72217d29baf5"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf0e99cdb600eabcd1d65cdba0d3c91418fee21c4aa1d28db47d095b1064a7d8"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe147fcd85aaed53ce90645c91ed5fca0cc88a797314c70dfd9d35925bd5d106"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf7cee56bd552385c1ee39af360772fbfc2f43be005c78d1140204ad6148438"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4aeb939bcac234b88e2d25d5381655e8353fe06b4e50b1c55ecffe56951d18c2"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c268b5100cfeaa222c40f55e169d484efa1384b44bf9ca415eae6d556f02cb08"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-win32.whl", hash = "sha256:46628ebcec4f23a1584fb52f2abe12ddb00f3bb3b7b337618b80fc1b51177aff"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-win_amd64.whl", hash = "sha256:7e0505719939e52a7b0c65d20e84a6044eb3712bb6f239c6b1db77ba8e173a37"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c884de19528e0fcd9dc34ee94c810581dd6e74aef75437ff17e696c2bfefae3e"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1abb387710283fc5983d8a1209d9696a4eae9db8d7ac94b402981fe2fe2e39ad"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cfa124eda500ba4b0d3afc3e91ea27ed4754e727c7f025f293a22f512bcd4c9"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6b28d303b9d57c17a5164eb1fd2d5119bb6ff4413d5894e74873280483eeb5"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b5a5bbe29c10c5bfd63893747a1bf6f8049df607638c786252cb9243b86b6706"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f0fda83e113bb0fb27dc003685f32a5dcb99c9c4f41f4fa0838ac35265c23b5c"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-win32.whl", hash = "sha256:957f8d85d5e834397ef78a6109550aeb0d27a53b5032f7a57f2451e1adc37e98"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-win_amd64.whl", hash = "sha256:1ffdf9c91428e59744f8e6f98190516f8e1d05eec90e936eb08b257332c5e870"}, - {file = "sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a"}, - {file = "sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6854175807af57bdb6425e47adbce7d20a4d79bbfd6f6d6519cd10bb7109a7f8"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05132c906066142103b83d9c250b60508af556982a385d96c4eaa9fb9720ac2b"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b4af17bda11e907c51d10686eda89049f9ce5669b08fbe71a29747f1e876036"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:c0b0e5e1b5d9f3586601048dd68f392dc0cc99a59bb5faf18aab057ce00d00b2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0b3dbf1e7e9bc95f4bac5e2fb6d3fb2f083254c3fdd20a1789af965caf2d2348"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win32.whl", hash = "sha256:1e3f196a0c59b0cae9a0cd332eb1a4bda4696e863f4f1cf84ab0347992c548c2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win_amd64.whl", hash = "sha256:6ab60a5089a8f02009f127806f777fca82581c49e127f08413a66056bd9166dd"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b1f09b6821406ea1f94053f346f28f8215e293344209129a9c0fcc3578598d7b"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1936af879e3db023601196a1684d28e12f19ccf93af01bf3280a3262c4b6b4e5"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2ac41acfc8d965fb0c464eb8f44995770239668956dc4cdf502d1b1ffe0d747"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c24e0c0fde47a9723c81d5806569cddef103aebbf79dbc9fcbb617153dea30"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23a8825495d8b195c4aa9ff1c430c28f2c821e8c5e2d98089228af887e5d7e29"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:60c578c45c949f909a4026b7807044e7e564adf793537fc762b2489d522f3d11"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win32.whl", hash = "sha256:118c16cd3f1b00c76d69343e38602006c9cfb9998fa4f798606d28d63f23beda"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl", hash = "sha256:7492967c3386df69f80cf67efd665c0f667cee67032090fe01d7d74b0e19bb08"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6375cd674fe82d7aa9816d1cb96ec592bac1726c11e0cafbf40eeee9a4516b5f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8c9fdd15a55d9465e590a402f42082705d66b05afc3ffd2d2eb3c6ba919560"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f9dc8c44acdee06c8fc6440db9eae8b4af8b01e4b1aee7bdd7241c22edff4f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c11ceb9a1f482c752a71f203a81858625d8df5746d787a4786bca4ffdf71c6"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:911cc493ebd60de5f285bcae0491a60b4f2a9f0f5c270edd1c4dbaef7a38fc04"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03968a349db483936c249f4d9cd14ff2c296adfa1290b660ba6516f973139582"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win32.whl", hash = "sha256:293cd444d82b18da48c9f71cd7005844dbbd06ca19be1ccf6779154439eec0b8"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl", hash = "sha256:3d3549fc3e40667ec7199033a4e40a2f669898a00a7b18a931d3efb4c7900504"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:90144d3b0c8b139408da50196c5cad2a6909b51b23df1f0538411cd23ffa45d3"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:023b3ee6169969beea3bb72312e44d8b7c27c75b347942d943cf49397b7edeb5"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:725875a63abf7c399d4548e686debb65cdc2549e1825437096a0af1f7e374814"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81965cc20848ab06583506ef54e37cf15c83c7e619df2ad16807c03100745dea"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dd5ec3aa6ae6e4d5b5de9357d2133c07be1aff6405b136dad753a16afb6717dd"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ff8e80c4c4932c10493ff97028decfdb622de69cae87e0f127a7ebe32b4069c6"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win32.whl", hash = "sha256:4d44522480e0bf34c3d63167b8cfa7289c1c54264c2950cc5fc26e7850967e45"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win_amd64.whl", hash = "sha256:81eedafa609917040d39aa9332e25881a8e7a0862495fcdf2023a9667209deda"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9a420a91913092d1e20c86a2f5f1fc85c1a8924dbcaf5e0586df8aceb09c9cc2"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:906e6b0d7d452e9a98e5ab8507c0da791856b2380fdee61b765632bb8698026f"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a373a400f3e9bac95ba2a06372c4fd1412a7cee53c37fc6c05f829bf672b8769"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:087b6b52de812741c27231b5a3586384d60c353fbd0e2f81405a814b5591dc8b"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:34ea30ab3ec98355235972dadc497bb659cc75f8292b760394824fab9cf39826"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8280856dd7c6a68ab3a164b4a4b1c51f7691f6d04af4d4ca23d6ecf2261b7923"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win32.whl", hash = "sha256:b50eab9994d64f4a823ff99a0ed28a6903224ddbe7fef56a6dd865eec9243440"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win_amd64.whl", hash = "sha256:5e22575d169529ac3e0a120cf050ec9daa94b6a9597993d1702884f6954a7d71"}, + {file = "sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576"}, + {file = "sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9"}, ] [package.dependencies] @@ -3231,4 +3251,4 @@ reference = "ali-mirrors" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "2b34e52cba181483fd7ab37d7d9ed92b4703aecc604befbd150d831e84de535d" +content-hash = "b227dc263d83a0d98c3903fc0bca5763858aaeda6f6c14f83294afddd05679b4" diff --git a/pyproject.toml b/pyproject.toml index b4c0db9..623f3a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,8 @@ flask = "^3.1.1" psycopg2 = "^2.9.10" gunicorn = "^23.0.0" gevent = "^25.5.1" +pymysql = "^1.1.1" +sqlalchemy = "^2.0.41" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/rag2_0/demo/intent_recognition_example.py b/rag2_0/demo/intent_recognition_example.py index 6c84c0e..75414ad 100644 --- a/rag2_0/demo/intent_recognition_example.py +++ b/rag2_0/demo/intent_recognition_example.py @@ -118,10 +118,65 @@ def process_query(recognizer, query): } else: # 可以在这里添加延迟,避免过快重试 - time.sleep(10 * retry_count) + time.sleep(10) + +def save_results_to_excel(results, output_file, is_final=False): + """ + 将结果保存到Excel文件 + + Args: + results: 结果列表 + output_file: 输出文件路径 + is_final: 是否为最终保存,如果是则使用完整文件名,否则添加临时标记 + + Returns: + None + """ + # 过滤掉None值 + valid_results = [r for r in results if r is not None] + + if not valid_results: + logging.warning("没有有效结果可保存") + return + + # 创建DataFrame + results_df = pd.DataFrame(valid_results) + + # 根据是否为最终保存确定文件名 + if not is_final: + file_name, file_ext = os.path.splitext(output_file) + temp_output_file = f"{file_name}_temp{file_ext}" + else: + temp_output_file = output_file + + # 使用ExcelWriter设置格式 + with pd.ExcelWriter(temp_output_file, engine='xlsxwriter') as writer: + results_df.to_excel(writer, index=False, sheet_name='Sheet1') + + # 获取工作簿和工作表对象 + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # 设置列宽(单位:像素) + # 定义列宽(厘米转为Excel单位,1cm约等于4.7个Excel单位) + worksheet.set_column('A:A', 60) # 提问列 60个Excel单位 + worksheet.set_column('B:B', 20) # 问题拆解 20个Excel单位 + worksheet.set_column('C:C', 20) # 一级分类 20个Excel单位 + worksheet.set_column('D:D', 20) # 二级分类 20个Excel单位 + worksheet.set_column('E:E', 60) # 问题改写 60个Excel单位 + worksheet.set_column('F:F', 60) # 检索到的关键词 60个Excel单位 + worksheet.set_column('G:G', 80) # 槽位填充 80个Excel单位 + + # 设置所有行高为20磅 + for i in range(len(results_df) + 1): # +1 是为了包括表头 + worksheet.set_row(i, 20) + + logging.info(f"已保存{len(valid_results)}条结果至: {temp_output_file}") # 示例查询 -examples_query = """储能软件组合件界面,点击隐藏空项目划分后界面没有任何变化""" +examples_query = """"锁标签号:811621005858, 注册单位:惠州电力勘察设计院有限公司,软件名称:广东迁改导则2022, 注册号:BW278-83834-58155-58339.迁改导则是要另外下载安装软件吗?" + +""" def main(): """ @@ -138,10 +193,10 @@ def main(): # 读取提问数据 current_dir = os.path.dirname(os.path.abspath(__file__)) - data_file = os.path.join(current_dir, "..", "..", "data", "excel", "400条提问意图分类数据-原始.xlsx") + data_file = os.path.join(current_dir, "..", "..", "data", "excel", "历史提问数据(dislike)_提问明确.xlsx") + output_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据_槽位填充结果.xlsx") # 检测是否为调试模式,调试模式下使用examples_query,否则从Excel读取 - is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None if is_debug: examples = examples_query.strip().split("\n") @@ -149,11 +204,13 @@ def main(): examples = load_questions_from_excel(data_file) if not is_debug: - - max_workers = 10 # 减少并发数以避免API限制 + max_workers = 20 # 减少并发数以避免API限制 logging.info(f"共有 {len(examples)} 个问题需要处理,使用 {max_workers} 个并发线程") + # 创建一个与输入顺序相同的结果列表 results = [None] * len(examples) + batch_size = 100 # 每100条保存一次 + # 使用线程池进行并发处理 with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: # 提交所有任务并记录它们的索引 @@ -163,43 +220,27 @@ def main(): future_to_index[future] = idx # 使用tqdm显示进度条 + completed = 0 for future in tqdm(concurrent.futures.as_completed(future_to_index), total=len(examples), desc="处理进度"): idx = future_to_index[future] result = future.result() # 将结果放在与输入相同的位置 results[idx] = result + + completed += 1 + # 每处理batch_size条数据保存一次 + if completed % batch_size == 0: + logging.info(f"已完成 {completed}/{len(examples)} 条,保存中间结果...") + save_results_to_excel(results, output_file, is_final=False) - # 将结果保存到Excel文件 - results_df = pd.DataFrame(results) - - output_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据_槽位填充结果.xlsx") - - # 使用ExcelWriter设置格式 - with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer: - results_df.to_excel(writer, index=False, sheet_name='Sheet1') - - # 获取工作簿和工作表对象 - workbook = writer.book - worksheet = writer.sheets['Sheet1'] - - # 设置列宽(单位:像素) - # 定义列宽(厘米转为Excel单位,1cm约等于4.7个Excel单位) - worksheet.set_column('A:A', 60) # 提问列 60个Excel单位 - worksheet.set_column('B:B', 20) # 问题拆解 20个Excel单位 - worksheet.set_column('C:C', 20) # 一级分类 20个Excel单位 - worksheet.set_column('D:D', 20) # 二级分类 20个Excel单位 - worksheet.set_column('E:E', 60) # 问题改写 60个Excel单位 - worksheet.set_column('F:F', 60) # 检索到的关键词 60个Excel单位 - worksheet.set_column('G:G', 80) # 槽位填充 80个Excel单位 - - # 设置所有行高为20磅 - for i in range(len(results_df) + 1): # +1 是为了包括表头 - worksheet.set_row(i, 20) + # 处理完所有数据后,保存最终结果 + save_results_to_excel(results, output_file, is_final=True) + logging.info(f"所有处理完成,最终结果已保存至: {output_file}") else: for idx, query in enumerate(examples): + if query.strip() == "": + continue process_query(recognizer, query) - - logging.info(f"处理完成,结果已保存至: {output_file}") def setup_logging(): # 配置日志输出到控制台 diff --git a/rag2_0/dify/chat_dify_by_workorder.py b/rag2_0/dify/chat_dify_by_workorder.py index 8a6d445..58bf921 100644 --- a/rag2_0/dify/chat_dify_by_workorder.py +++ b/rag2_0/dify/chat_dify_by_workorder.py @@ -1,4 +1,4 @@ -from rag2_0.dify.workflow_chat import NewWorkflowChat +from rag2_0.dify.dify_tool import NewWorkflowChat import pandas as pd from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm diff --git a/rag2_0/dify/dify_tool.py b/rag2_0/dify/dify_tool.py index 2dd2130..a5b61e1 100644 --- a/rag2_0/dify/dify_tool.py +++ b/rag2_0/dify/dify_tool.py @@ -1,8 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- import psycopg2 -from psycopg2 import sql import os import json -from datetime import timezone, timedelta +from concurrent.futures import ThreadPoolExecutor, as_completed +from rag2_0.dify.dify_client import ChatClient +from pydantic import BaseModel, Field +from langchain.output_parsers import PydanticOutputParser + + +class ContentSource(BaseModel): + score: int = Field(description="相关性分数") + reason: str = Field(description="评分理由") class PgSql: """ @@ -219,6 +228,299 @@ class DifyTool: finally: dify_pgsql.close_connection() +class BaseWorkflowChat: + """ + 工作流对话基类,封装了与Dify API交互的基本功能 + """ + def __init__(self, api_key: str, base_url: str): + """ + 初始化工作流对话基类 + + Args: + api_key: Dify API的密钥 + base_url: Dify API的基础URL + """ + self.chat_client = ChatClient(api_key=api_key, base_url=base_url) + self.content_source_parser = PydanticOutputParser(pydantic_object=ContentSource) + + def create_chat_message(self, query: str): + """ + 创建聊天消息 + + Args: + query: 问题内容 + + Returns: + tuple: (聊天响应, 消息ID) + """ + try: + response = self.chat_client.create_chat_message(inputs={}, query=query, user="AutoTestDifyChat").json() + return response, response["message_id"] + except Exception as e: + raise e + + def calculate_score(self, query: str, content: str) -> int: + """ + 使用LLM判断query与content之间的相关性分数 + + Args: + query (str): 用户问题 + content (str): 检索内容 + + Returns: + int: 相关性分数,1-10分,10代表完全相关,1代表完全不相关;-1表示评分失败 + """ + from rag2_0.tool.ModelTool import OpenAiLLM + + try: + prompt = f"""你是一个专业的信息相关性评估助手。请根据以下标准对用户query和检索内容的相关性进行1-10评分(10=完全相关,1=完全不相关),并按指定格式输出JSON结果。 + +【评分标准】 +10分:完全契合,主题/意图完全一致且涵盖所有关键信息 +8-9分:高度相关,核心要素匹配但存在少量信息缺失 +6-7分:部分相关,涉及相同主题但存在重要信息缺失 +4-5分:弱相关,仅次要信息点匹配 +1-3分:完全不相关或信息冲突 + +【评估维度】 +1. 主题一致性:核心主题/意图的匹配程度 +2. 内容覆盖度:是否涵盖query的关键要素 +3. 信息准确性:是否存在矛盾/错误信息 +4. 细节丰富度:是否提供query要求的详细信息 + +【输出格式】 +{{ + "score": 评分, + "reason": "简明扼要的评分理由(中文)" +}} + +【示例】 +query: "新冠疫苗的常见副作用" +内容: "辉瑞疫苗常见反应包括注射部位疼痛(84.1%)、疲劳(62.9%)" +输出: {{"score":8,"reason":"主题完全匹配,涵盖主要副作用但未提及发热等常见反应"}} + +现在评估: +query: "{query}" +content: "{content}" +""" + api_key = os.getenv("OPENAI_API_KEY") + base_url = os.getenv("OPENAI_API_BASE") + model = os.getenv("LLM_MODEL_NAME") + llm = OpenAiLLM(api_key=api_key, base_url=base_url, model=model) + response = llm.invoke(user_prompt=prompt, need_retry=True) + + # 解析JSON响应 + try: + parsed_output = self.content_source_parser.parse(response.content) + return parsed_output.score + except Exception as e: + return -1 + except Exception as e: + return -1 + + def get_retrieve_info(self, query: str, outputs: dict) -> tuple: + """ + 获取检索信息并计算分数 + + Args: + query (str): 用户问题 + outputs (dict): 检索输出结果 + + Returns: + tuple: (检索内容列表, 最高分, 最低分, 平均分) + """ + max_score = 0 + min_score = 10 + total_score = 0 + valid_scores = 0 + retrieve_content = [] + + # 使用线程池并发计算分数 + with ThreadPoolExecutor() as executor: + # 创建任务列表 + future_to_content = {} + for result in outputs["result"]: + content = result["content"].strip() + future = executor.submit(self.calculate_score, query=query, content=content) + future_to_content[future] = content + + # 收集结果 + for future in as_completed(future_to_content): + content = future_to_content[future] + score = future.result() + content_title = content.split("\n")[0] + + if score != -1: + max_score = max(max_score, score) + min_score = min(min_score, score) + total_score += score + valid_scores += 1 + + if content_title: + retrieve_content.append(content_title + f"--相关性得分({score}分)") + + avg_score = total_score / valid_scores if valid_scores > 0 else 0 + return retrieve_content, max_score, min_score, avg_score + + +class NewWorkflowChat(BaseWorkflowChat): + """ + 新工作流对话类,用于调用新工作流发送对话并解析获取相关数据 + """ + def process_question(self, query: str) -> dict: + """ + 处理问题,获取新工作流的回答和相关信息 + + Args: + query: 问题内容 + + Returns: + dict: 包含问题、回答和相关信息的字典 + """ + response, message_id = self.create_chat_message(query) + + if isinstance(response, str) and response.startswith("error:"): + raise RuntimeError(f"create_chat_message 出错:{response}") + + answer = response["answer"] + workflow_info = self.get_workflow_info(query, message_id) + + if workflow_info is None: + return None + + result = { + "问题": query, + "新流程答案": answer, + "新问题改写": workflow_info["问题改写"], + "新问题分类": workflow_info["问题分类"], + "槽点信息": workflow_info["槽点信息"], + "新检索词条": workflow_info["检索词条"], + "检索内容": workflow_info["检索内容"], + "message_id":message_id + } + + return result + + def get_workflow_info(self, query: str, message_id: str) -> dict: + """ + 获取新工作流的问题分类和检索信息 + + Args: + query (str): 用户问题 + message_id (str): 新工作流的消息ID + + Returns: + dict: 包含问题分类结果的字典 + """ + retrieve_title = [] + retrieve_content = [] + max_score = 0 + min_score = 0 + avg_score = 0 + rewrite_query = "" + vertical_classification = "" + sub_classification = "" + slot_info = "" + + try: + message_info = DifyTool.get_message_debug_info_by_id(message_id=message_id) + for workflow_node in message_info["workflow_node_executions_info"]: + if workflow_node["title"] == "知识检索结果后处理": + outputs = json.loads(workflow_node["outputs"]) + retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) + retrieve_content = outputs["result"] + elif workflow_node["title"] == "问题优化结果解析": + outputs = json.loads(workflow_node["outputs"]) + rewrite_query = outputs["optimize_query"] + llm_result_json = json.loads(workflow_node['inputs'])["llm_result"] + json_result = json.loads(llm_result_json) + vertical_classification = json_result['vertical_classification'] + sub_classification = json_result['sub_classification'] + slot_info = json.dumps(json_result["slot_filling"], ensure_ascii=False, indent=2) + except Exception as e: + raise e + + return { + "问题改写": rewrite_query, + "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", + "检索内容": retrieve_content, + "问题分类": f"{vertical_classification} - {sub_classification}", + "槽点信息": slot_info, + + } + +class OldWorkFlowChat(BaseWorkflowChat): + """ + 旧工作流对话类,用于调用旧工作流发送对话并解析获取相关数据 + """ + def process_question(self, query: str) -> dict: + """ + 处理问题,获取旧工作流的回答和相关信息 + + Args: + query: 问题内容 + + Returns: + dict: 包含问题、回答和相关信息的字典 + """ + response, message_id = self.create_chat_message(query) + + if isinstance(response, str) and response.startswith("error:"): + return None + + answer = response["answer"] + workflow_info = self.get_workflow_info(query, message_id) + + if workflow_info is None: + return None + + result = { + "问题": query, + "旧流程答案": answer, + "旧问题改写": workflow_info["问题改写"], + "旧检索词条": workflow_info["检索词条"], + "检索内容": workflow_info["检索内容"], + "message_id":message_id + } + + return result + + def get_workflow_info(self, query: str, message_id: str) -> dict: + """ + 获取旧工作流的问题改写和检索信息 + + Args: + query (str): 用户问题 + message_id (str): 旧工作流的消息ID + + Returns: + dict: 包含问题改写和检索信息的字典 + """ + retrieve_title = [] + retrieve_content = [] + max_score = 0 + min_score = 0 + avg_score = 0 + rewrite_query = "" + + try: + message_info = DifyTool.get_message_debug_info_by_id(message_id=message_id) + for workflow_node in message_info["workflow_node_executions_info"]: + if workflow_node["title"] == "知识检索结果后处理": + outputs = json.loads(workflow_node["outputs"]) + retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) + retrieve_content = outputs["result"] + elif workflow_node["title"] == "问题优化结果解析": + outputs = json.loads(workflow_node["outputs"]) + rewrite_query = outputs["optimize_query"] + except Exception as e: + return None + + return { + "问题改写": rewrite_query, + "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", + "检索内容": retrieve_content, + } if __name__ == "__main__": try: diff --git a/rag2_0/dify/test_dify_chatapi.py b/rag2_0/dify/test_dify_chatapi.py index 3943523..f173393 100755 --- a/rag2_0/dify/test_dify_chatapi.py +++ b/rag2_0/dify/test_dify_chatapi.py @@ -2,7 +2,8 @@ # -*- coding: utf-8 -*- import os -from rag2_0.dify.dify_client import ChatClient, DifyClient +from rag2_0.dify.dify_client import DifyClient +from rag2_0.dify.dify_tool import NewWorkflowChat, OldWorkFlowChat import pandas as pd # 使用线程池并发执行 from concurrent.futures import ThreadPoolExecutor, as_completed @@ -44,8 +45,9 @@ class DifyComparisonTester: max_workers: 最大工作线程数 """ self.excel_path = excel_path - self.old_chat = ChatClient(api_key=old_workflow_api_key, base_url=baseurl) - self.new_chat = ChatClient(api_key=new_workflow_api_key, base_url=baseurl) + # 使用NewWorkflowChat和OldWorkFlowChat代替ChatClient + self.old_chat = OldWorkFlowChat(api_key=old_workflow_api_key, base_url=baseurl) + self.new_chat = NewWorkflowChat(api_key=new_workflow_api_key, base_url=baseurl) # 评判相关参数 self.output_path = output_path or os.path.join(os.path.dirname(self.excel_path), "dify问答_综合评判结果.xlsx") @@ -78,13 +80,13 @@ class DifyComparisonTester: """ def get_old_answer(): try: - return self.old_chat.create_chat_message(inputs={}, query=q, user="AutoTestDifyChat").json() + return self.old_chat.process_question(query=q) except Exception as e: return f"error: {str(e)}" def get_new_answer(): try: - return self.new_chat.create_chat_message(inputs={}, query=q, user="AutoTestDifyChat").json() + return self.new_chat.process_question(query=q) except Exception as e: return f"error: {str(e)}" @@ -95,14 +97,15 @@ class DifyComparisonTester: try: old_result = future_old.result() new_result = future_new.result() - old_message_id = old_result["message_id"] - new_message_id = new_result["message_id"] + + if isinstance(old_result, str) and old_result.startswith("error:"): + return None, None + if isinstance(new_result, str) and new_result.startswith("error:"): + return None, None - old_answer = old_result["answer"] - new_answer = new_result["answer"] except Exception as e: return None, None, None - return {"问题": q, "旧流程答案": old_answer, "新流程答案": new_answer}, old_message_id, new_message_id + return future_old, future_new def find_wiki_link(self, query) -> str | None: """ @@ -407,22 +410,24 @@ content: "{content}" Returns: dict: 包含问题分类结果的字典 """ - retrieve_title=[] - retrieve_content=[] - max_score=0 - min_score=0 - avg_score=0 - rewrite_query="" - vertical_classification="" - sub_classification="" - slot_info="" try: + # 使用DifyTool直接获取消息信息 new_message_info = DifyTool.get_message_debug_info_by_id(message_id=new_message_id) + + # 初始化变量 + retrieve_title = [] + retrieve_content = [] + rewrite_query = "" + vertical_classification = "" + sub_classification = "" + slot_info = "" + + # 解析工作流节点信息 for workflow_node in new_message_info["workflow_node_executions_info"]: if workflow_node["title"] == "知识检索结果后处理": outputs = json.loads(workflow_node["outputs"]) retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) - retrieve_content=outputs["result"] + retrieve_content = outputs["result"] elif workflow_node["title"] == "问题优化结果解析": outputs = json.loads(workflow_node["outputs"]) rewrite_query = outputs["optimize_query"] @@ -430,20 +435,21 @@ content: "{content}" json_result = json.loads(llm_result_json) vertical_classification = json_result['vertical_classification'] sub_classification = json_result['sub_classification'] - slot_info=json.dumps(json_result["slot_filling"],ensure_ascii=False,indent=2) + slot_info = json.dumps(json_result["slot_filling"], ensure_ascii=False, indent=2) except Exception as e: return None + return { "问题改写": rewrite_query, "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", "检索内容": retrieve_content, "问题分类": f"{vertical_classification} - {sub_classification}", - "槽点信息":slot_info + "槽点信息": slot_info } def get_old_workflow_info(self, query:str, old_message_id:str) -> dict: """ - 获取新流程的问题分类 + 获取旧流程的问题分类 Args: query (str): 用户问题 @@ -452,24 +458,27 @@ content: "{content}" Returns: dict: 包含问题分类结果的字典 """ - retrieve_title=[] - retrieve_content=[] - max_score=0 - min_score=0 - avg_score=0 - rewrite_query="" try: + # 使用DifyTool直接获取消息信息 old_message_info = DifyTool.get_message_debug_info_by_id(message_id=old_message_id) + + # 初始化变量 + retrieve_title = [] + retrieve_content = [] + rewrite_query = "" + + # 解析工作流节点信息 for workflow_node in old_message_info["workflow_node_executions_info"]: if workflow_node["title"] == "知识检索结果后处理": outputs = json.loads(workflow_node["outputs"]) retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) - retrieve_content=outputs["result"] + retrieve_content = outputs["result"] elif workflow_node["title"] == "问题优化结果解析": outputs = json.loads(workflow_node["outputs"]) rewrite_query = outputs["optimize_query"] except Exception as e: return None + return { "问题改写": rewrite_query, "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", @@ -512,13 +521,13 @@ content: "{content}" dict: 包含问题、回答和评判结果的字典 """ # 获取基本的问题和回答 - basic_result, old_message_id, new_message_id = self.process_question(q) - if basic_result is None: + future_old, future_new = self.process_question(q) + if future_old is None or future_new is None: return None - query = basic_result["问题"] - old_answer = basic_result["旧流程答案"] - new_answer = basic_result["新流程答案"] + query = future_old["问题"] + old_answer = future_old["旧流程答案"] + new_answer = future_new["新流程答案"] # 获取词条链接和标准答案 wiki_url = self.find_wiki_link(query) @@ -540,33 +549,23 @@ content: "{content}" if judge_result is None: judge_result = "" + + # retrieve_title_score = self.get_retrieve_title_similarity(old_retrieve_content=old_workflow_info["检索内容"], new_retrieve_content=new_workflow_info["检索内容"]) - # 并行获取新旧流程信息 - with ThreadPoolExecutor(max_workers=2) as executor: - future_new = executor.submit(self.get_new_workflow_info, query=query, new_message_id=new_message_id) - future_old = executor.submit(self.get_old_workflow_info, query=query, old_message_id=old_message_id) - - try: - new_workflow_info = future_new.result() - old_workflow_info = future_old.result() - except Exception as e: - print(f"处理问题 '{query}' 获取工作流信息时发生错误: {str(e)}") - return None - retrieve_title_score=self.get_retrieve_title_similarity(old_retrieve_content=old_workflow_info["检索内容"], new_retrieve_content=new_workflow_info["检索内容"]) # 返回结果 return { "问题": query, - "新问题改写": new_workflow_info["问题改写"], - "旧问题改写": old_workflow_info["问题改写"], - "新问题分类": new_workflow_info["问题分类"], - "槽点信息":new_workflow_info["槽点信息"], + "新问题改写": future_new["问题改写"], + "旧问题改写": future_old["问题改写"], + "新问题分类": future_new["问题分类"], + "槽点信息": future_new["槽点信息"], "新流程答案": new_answer, "旧流程答案": old_answer, "回答判断": judge_result, - "词条检索相似度": retrieve_title_score, + # "词条检索相似度": retrieve_title_score, "答案词条": answer_title if answer_title else "", - "新检索词条": new_workflow_info["检索词条"], - "旧检索词条": old_workflow_info["检索词条"], + "新检索词条": future_new["检索词条"], + "旧检索词条": future_old["检索词条"], } def run_comparison(self, with_judge=False): @@ -670,5 +669,7 @@ if __name__ == "__main__": print(f"对比评判结果已保存至: {output_file}") # 单个问题测试示例 -# c = DifyChat(baseurl="http://172.20.0.145/v1", api_key="app-LjJaeLoAfqa6aoGzqU9UvxSf") -# c.chat("如何新建配电线路工程") +# 使用新的工作流类进行测试 +# new_chat = NewWorkflowChat(api_key="app-qxsSybCs7ABiKlC1JabTYVn6", base_url="http://172.20.0.145/v1") +# result = new_chat.process_question("如何新建配电线路工程") +# print(json.dumps(result, ensure_ascii=False, indent=2)) diff --git a/rag2_0/dify/workflow_chat.py b/rag2_0/dify/workflow_chat.py deleted file mode 100644 index fce903a..0000000 --- a/rag2_0/dify/workflow_chat.py +++ /dev/null @@ -1,310 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import json -from concurrent.futures import ThreadPoolExecutor, as_completed -from rag2_0.dify.dify_client import ChatClient, DifyClient -from rag2_0.dify.dify_tool import DifyTool -from pydantic import BaseModel, Field -from langchain.output_parsers import PydanticOutputParser -from threading import Lock - -class ContentSource(BaseModel): - score: int = Field(description="相关性分数") - reason: str = Field(description="评分理由") - -class BaseWorkflowChat: - """ - 工作流对话基类,封装了与Dify API交互的基本功能 - """ - def __init__(self, api_key: str, base_url: str): - """ - 初始化工作流对话基类 - - Args: - api_key: Dify API的密钥 - base_url: Dify API的基础URL - """ - self.chat_client = ChatClient(api_key=api_key, base_url=base_url) - self.content_source_parser = PydanticOutputParser(pydantic_object=ContentSource) - - def create_chat_message(self, query: str): - """ - 创建聊天消息 - - Args: - query: 问题内容 - - Returns: - tuple: (聊天响应, 消息ID) - """ - try: - response = self.chat_client.create_chat_message(inputs={}, query=query, user="AutoTestDifyChat").json() - return response, response["message_id"] - except Exception as e: - raise e - - def calculate_score(self, query: str, content: str) -> int: - """ - 使用LLM判断query与content之间的相关性分数 - - Args: - query (str): 用户问题 - content (str): 检索内容 - - Returns: - int: 相关性分数,1-10分,10代表完全相关,1代表完全不相关;-1表示评分失败 - """ - from rag2_0.tool.ModelTool import OpenAiLLM - - try: - prompt = f"""你是一个专业的信息相关性评估助手。请根据以下标准对用户query和检索内容的相关性进行1-10评分(10=完全相关,1=完全不相关),并按指定格式输出JSON结果。 - -【评分标准】 -10分:完全契合,主题/意图完全一致且涵盖所有关键信息 -8-9分:高度相关,核心要素匹配但存在少量信息缺失 -6-7分:部分相关,涉及相同主题但存在重要信息缺失 -4-5分:弱相关,仅次要信息点匹配 -1-3分:完全不相关或信息冲突 - -【评估维度】 -1. 主题一致性:核心主题/意图的匹配程度 -2. 内容覆盖度:是否涵盖query的关键要素 -3. 信息准确性:是否存在矛盾/错误信息 -4. 细节丰富度:是否提供query要求的详细信息 - -【输出格式】 -{{ - "score": 评分, - "reason": "简明扼要的评分理由(中文)" -}} - -【示例】 -query: "新冠疫苗的常见副作用" -内容: "辉瑞疫苗常见反应包括注射部位疼痛(84.1%)、疲劳(62.9%)" -输出: {{"score":8,"reason":"主题完全匹配,涵盖主要副作用但未提及发热等常见反应"}} - -现在评估: -query: "{query}" -content: "{content}" -""" - api_key = os.getenv("OPENAI_API_KEY") - base_url = os.getenv("OPENAI_API_BASE") - model = os.getenv("LLM_MODEL_NAME") - llm = OpenAiLLM(api_key=api_key, base_url=base_url, model=model) - response = llm.invoke(user_prompt=prompt, need_retry=True) - - # 解析JSON响应 - try: - parsed_output = self.content_source_parser.parse(response.content) - return parsed_output.score - except Exception as e: - return -1 - except Exception as e: - return -1 - - def get_retrieve_info(self, query: str, outputs: dict) -> tuple: - """ - 获取检索信息并计算分数 - - Args: - query (str): 用户问题 - outputs (dict): 检索输出结果 - - Returns: - tuple: (检索内容列表, 最高分, 最低分, 平均分) - """ - max_score = 0 - min_score = 10 - total_score = 0 - valid_scores = 0 - retrieve_content = [] - - # 使用线程池并发计算分数 - with ThreadPoolExecutor() as executor: - # 创建任务列表 - future_to_content = {} - for result in outputs["result"]: - content = result["content"].strip() - future = executor.submit(self.calculate_score, query=query, content=content) - future_to_content[future] = content - - # 收集结果 - for future in as_completed(future_to_content): - content = future_to_content[future] - score = future.result() - content_title = content.split("\n")[0] - - if score != -1: - max_score = max(max_score, score) - min_score = min(min_score, score) - total_score += score - valid_scores += 1 - - if content_title: - retrieve_content.append(content_title + f"--相关性得分({score}分)") - - avg_score = total_score / valid_scores if valid_scores > 0 else 0 - return retrieve_content, max_score, min_score, avg_score - - -class NewWorkflowChat(BaseWorkflowChat): - """ - 新工作流对话类,用于调用新工作流发送对话并解析获取相关数据 - """ - def process_question(self, query: str) -> dict: - """ - 处理问题,获取新工作流的回答和相关信息 - - Args: - query: 问题内容 - - Returns: - dict: 包含问题、回答和相关信息的字典 - """ - response, message_id = self.create_chat_message(query) - - if isinstance(response, str) and response.startswith("error:"): - raise RuntimeError(f"create_chat_message 出错:{response}") - - answer = response["answer"] - workflow_info = self.get_workflow_info(query, message_id) - - if workflow_info is None: - return None - - result = { - "问题": query, - "新流程答案": answer, - "新问题改写": workflow_info["问题改写"], - "新问题分类": workflow_info["问题分类"], - "槽点信息": workflow_info["槽点信息"], - "新检索词条": workflow_info["检索词条"], - "检索内容": workflow_info["检索内容"], - "message_id":message_id - } - - return result - - def get_workflow_info(self, query: str, message_id: str) -> dict: - """ - 获取新工作流的问题分类和检索信息 - - Args: - query (str): 用户问题 - message_id (str): 新工作流的消息ID - - Returns: - dict: 包含问题分类结果的字典 - """ - retrieve_title = [] - retrieve_content = [] - max_score = 0 - min_score = 0 - avg_score = 0 - rewrite_query = "" - vertical_classification = "" - sub_classification = "" - slot_info = "" - - try: - message_info = DifyTool.get_message_debug_info_by_id(message_id=message_id) - for workflow_node in message_info["workflow_node_executions_info"]: - if workflow_node["title"] == "知识检索结果后处理": - outputs = json.loads(workflow_node["outputs"]) - retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) - retrieve_content = outputs["result"] - elif workflow_node["title"] == "问题优化结果解析": - outputs = json.loads(workflow_node["outputs"]) - rewrite_query = outputs["optimize_query"] - llm_result_json = json.loads(workflow_node['inputs'])["llm_result"] - json_result = json.loads(llm_result_json) - vertical_classification = json_result['vertical_classification'] - sub_classification = json_result['sub_classification'] - slot_info = json.dumps(json_result["slot_filling"], ensure_ascii=False, indent=2) - except Exception as e: - raise e - - return { - "问题改写": rewrite_query, - "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", - "检索内容": retrieve_content, - "问题分类": f"{vertical_classification} - {sub_classification}", - "槽点信息": slot_info, - - } - - -class OldWorkFlowChat(BaseWorkflowChat): - """ - 旧工作流对话类,用于调用旧工作流发送对话并解析获取相关数据 - """ - def process_question(self, query: str) -> dict: - """ - 处理问题,获取旧工作流的回答和相关信息 - - Args: - query: 问题内容 - - Returns: - dict: 包含问题、回答和相关信息的字典 - """ - response, message_id = self.create_chat_message(query) - - if isinstance(response, str) and response.startswith("error:"): - return None - - answer = response["answer"] - workflow_info = self.get_workflow_info(query, message_id) - - if workflow_info is None: - return None - - result = { - "问题": query, - "旧流程答案": answer, - "旧问题改写": workflow_info["问题改写"], - "旧检索词条": workflow_info["检索词条"], - "检索内容": workflow_info["检索内容"], - "message_id":message_id - } - - return result - - def get_workflow_info(self, query: str, message_id: str) -> dict: - """ - 获取旧工作流的问题改写和检索信息 - - Args: - query (str): 用户问题 - message_id (str): 旧工作流的消息ID - - Returns: - dict: 包含问题改写和检索信息的字典 - """ - retrieve_title = [] - retrieve_content = [] - max_score = 0 - min_score = 0 - avg_score = 0 - rewrite_query = "" - - try: - message_info = DifyTool.get_message_debug_info_by_id(message_id=message_id) - for workflow_node in message_info["workflow_node_executions_info"]: - if workflow_node["title"] == "知识检索结果后处理": - outputs = json.loads(workflow_node["outputs"]) - retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs) - retrieve_content = outputs["result"] - elif workflow_node["title"] == "问题优化结果解析": - outputs = json.loads(workflow_node["outputs"]) - rewrite_query = outputs["optimize_query"] - except Exception as e: - return None - - return { - "问题改写": rewrite_query, - "检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库", - "检索内容": retrieve_content, - } \ No newline at end of file diff --git a/rag2_0/intent_recognition/DataModels.py b/rag2_0/intent_recognition/DataModels.py index 4fe8040..8ef1834 100644 --- a/rag2_0/intent_recognition/DataModels.py +++ b/rag2_0/intent_recognition/DataModels.py @@ -23,12 +23,12 @@ class SoftwareName(str, Enum): # 软件别名映射 ALIASES = { - D3: ["配网D3", "D3软件", "配网工程软件"], - C1: ["储能C1", "C1软件", "储能电站软件", "储能软件"], - Z1: ["西藏Z1", "Z1软件", "西藏电力软件"], - T1: ["技改T1", "T1软件", "技改检修软件"], - T1_LIST: ["技改清单T1", "T1清单软件", "技改检修清单软件"], - MAIN: ["主网软件", "电力建设软件", "主网建设软件", "主网软件"] + D3: "别名包括:配网D3、D3软件、配网工程软件等 其他类似称呼", + C1: "别名包括:储能C1、C1软件、储能电站软件、储能软件等 其他类似称呼", + Z1: "别名包括:西藏Z1、Z1软件、西藏电力软件等 其他类似称呼", + T1: "别名包括:技改T1、T1软件、技改检修软件等 其他类似称呼", + T1_LIST: "别名包括:技改清单T1、T1清单软件、技改检修清单软件等 其他类似称呼", + MAIN: "别名包括:主网软件、电力建设软件、主网建设软件、博微电力建设计价通等 其他类似称呼" } # 定义输出模型 @@ -58,7 +58,7 @@ class QueryRewrite(BaseModel): # 1. 软件问题 # 1.1 软件功能 class SoftwareFunction(BaseModel): - software_name: SoftwareName = Field(description="软件名称") + software_name: SoftwareName = Field(description="软件名称,只能从给定的范围中取值") function_name: str = Field(description="具体功能名称") operation: str = Field(description="用户操作意图(如何使用功能、功能入口、功能使用场景)") software_version: Optional[str] = Field(None, description="软件版本") @@ -77,7 +77,7 @@ class SoftwareFunction(BaseModel): # 1.2 故障排查 class TroubleShooting(BaseModel): - software_name: SoftwareName = Field(description="软件名称") + software_name: SoftwareName = Field(description="软件名称,只能从给定的范围中取值") function_name: str = Field(description="具体功能名称/操作描述") error_message: str = Field(description="报错信息/异常现象") software_version: Optional[str] = Field(None, description="软件版本") @@ -162,7 +162,7 @@ class SoftwareLock(BaseModel): # 3.3 安装下载类 class InstallationDownload(BaseModel): - software_name: SoftwareName = Field(description="软件/插件名称,与file_name二选一") + software_name: str = Field(description="软件/插件名称,与file_name二选一") file_name: str = Field(description="文件名,与software_name二选一") operation_stage: str = Field(description="操作阶段") os_version: Optional[str] = Field(None, description="操作系统版本") @@ -182,7 +182,7 @@ class InstallationDownload(BaseModel): # 3.4 问题排查类 class ProblemDiagnosis(BaseModel): error_message: str = Field(description="报错信息/异常现象") - software_name: Optional[SoftwareName] = Field(None, description="软件名称") + software_name: Optional[SoftwareName] = Field(None, description="软件名称,只能从给定的范围中取值") os_version: Optional[str] = Field(None, description="操作系统版本") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: diff --git a/rag2_0/intent_recognition/IntentRecognition.py b/rag2_0/intent_recognition/IntentRecognition.py index d1df74c..df66f74 100644 --- a/rag2_0/intent_recognition/IntentRecognition.py +++ b/rag2_0/intent_recognition/IntentRecognition.py @@ -180,7 +180,7 @@ class IntentRecognizer: return reranked_terms except Exception as e: - return list(matched_terms) + raise RuntimeError(f"SiliconFlowReRankerModel重排失败:{e}") from e def match_keywords(self, query: str) -> Tuple[TermList, List[str]]: """ diff --git a/rag2_0/tool/APIKeyManager.py b/rag2_0/tool/APIKeyManager.py index 3489e33..e6035d0 100644 --- a/rag2_0/tool/APIKeyManager.py +++ b/rag2_0/tool/APIKeyManager.py @@ -18,26 +18,6 @@ import requests # sk-dvbaktabkdwdpjgxyoozlwnejosjyhdgqwllfeborqahndxs API_KEY_LIST=[ -"sk-hrojkkkrrkmsajtnizokbcgexsfggdiqavbtvbayuwqbnmom", -"sk-kkdklmnyompoiotzkfqahpayzlkgogfudjkyaebehtsowvid", -"sk-sfxzvllifafbyfduupcdtcrjwhdyiyojnksyopnfslurnhsp", -"sk-faqirxiszukfswqvzqawxnemqfacrkyurbxxkzwbbujqacdp", -"sk-vonaanuueqiczppkntjuphateshrcpqpnvxmwxorkyihjmrb", -"sk-qfpeoodgupcukcdstjcxgegwxnuhtxkkrupkogkcvhavxgny", -"sk-fsvjnbpfgoadixympaabaukupuhjvbturcbxaqfdzjznemtr", -"sk-fltvnbiqntfawjwkfnnhmyfiimzgzxkweqmefcfqkbucwrhi", -"sk-oosswdriwyqkglwdigvcxgmcpyplcyowicbaugpizoscevdl", -"sk-jswtxhkiralnyiukqimtyuurcaepulxdrfijadtxzrgsajyc", -"sk-dcjuhoukdyrbneadtxtnyxzmigkpiqgtqqnreiprxpioftsv", -"sk-yrhezyuxjblpaxzzudbowqmvcoxcammupcubghbodolikbdk", -"sk-dsgvwpfagmarilmnewwbzhfzlqehburoupjaopucdvybpbdo", -"sk-oljjlspuaurtoczyekztiidwtoerugadgepiufclpmrbdfqc", -"sk-crgrimubjesthvxuqwedqqdoetljyrgeahxxpctfefgnkpyo", -"sk-tubqhwgycxrdhwsqzjopxgeaqpsjdfppckckayvzornaluwq", -"sk-amcxlmsdnadptpnehqnkvseolacipztmvovnmxojzohbjjil", -"sk-pdyymhshpzmdduwxsezthnrgarnnhgzvmiflbpisfzxkiayt", -"sk-qhwoorywmejumyudfxbrkegxtqifsbgcdkmpjckezepgyqnz", -"sk-cpoctrgcnstaybeyuieuwjdgeakudhqdnnwdjavjudcbvvem", "sk-wqdpapdkisovziexgcyxvumpwzbjnhqbxvcqcspzctjhyhjk", "sk-bbntrnifrtdzhhgrtlrhvwbnaysuszviemshdakxonnnymnb", "sk-vmpnwjxersrwybmfhfxgsvbmhsmpjldxseiyxovnysrlbuzi", @@ -98,6 +78,25 @@ API_KEY_LIST=[ "sk-nbksjgcngsayoumnsdbkcpnqivnvxjenwpzuazzrkhnsgeoo", "sk-iaafvpjyqiocgzchbdldbkgcffqniahkcbgoviuevuogulcm", "sk-muvjguqeshyimzowqnqgxwpsgujlpkqgrisxsimthtyrpypx", +"sk-jgybgyayxlwoxeijgrjcneqlyusleohgbliuwpsuhocrjsmk", +"sk-wzjsmwxcbbpcrqivqfzjwufqqjtlwejtncnvbpeicznkwiuh", +"sk-izdjicdoyillktsihkiapuvwebisehtlgykozrvzfkgncwsc", +"sk-fcsfmyivfuojsqsditvobfqprdpeunukycpcfnoxkraqevpx", +"sk-szyjgyxrcvyxpvzfwgmbxnflxngxvcplitcctsdvvrqjgftk", +"sk-jzbodthsnvjwbyrnynsxrudtqfnbdbrcxebjwjgajocnzqse", +"sk-fxepossfzpmccibfwqpkluorzqlbtcaplepeugtfzfsctcbl", +"sk-ympnflocrkxjrbubsxqdjqwicuyavvvysctlpfhunkcrzxjx", +"sk-flhqvziknntednkcgjaxlyzzsrfzjhrzrmteqonajpbiinni", +"sk-xfregpbbquqbxpiobjzanydsjivrjrnbokzxcqtnhxhyghhe", +"sk-jrdzerhmvrtvzawkksowbgkggkubwfquplmrxbdhespqgtis", +"sk-jjbpnkbeupsxyclcivbhizcfpfjrppddunbqynyjkqhtmpwu", +"sk-oqehupcveovkjqqtxypqyifidcdissuyehwrkdwgruoyjkpq", +"sk-orhfntzrbpmpavybcjyylofxncdvufdmvlznofmhxmnjymjl", +"sk-kvgfuqeqvpmfsccykyoohheshclcrtvjlnewratvrjpkpbkc", +"sk-zhnbqnpuumuuvegnvbgoggxafpukbzchpgrugpkobiwkzsar", +"sk-kzhxlqvqcxlnbdgnpalqnzumkmspepkttkgbophnkqanainw", +"sk-bzttugqtlskrvguvhckwamdssvgmgnrqpsialpdbskfsyyak", +"sk-tovmogiablsoeabwgqyvevpcfichyjpuzqdymmvksspdrtqt", ] class APIKeyManager: