Initial commit from Create Llama

2024-08-08 18:33:08 +08:00
commit 4923337038
97 changed files with 5378 additions and 0 deletions
@@ -0,0 +1,56 @@
+import os
+import yaml
+import json
+import importlib
+from cachetools import cached, LRUCache
+from llama_index.core.tools.tool_spec.base import BaseToolSpec
+from llama_index.core.tools.function_tool import FunctionTool
+
+
+class ToolType:
+    LLAMAHUB = "llamahub"
+    LOCAL = "local"
+
+
+class ToolFactory:
+
+    TOOL_SOURCE_PACKAGE_MAP = {
+        ToolType.LLAMAHUB: "llama_index.tools",
+        ToolType.LOCAL: "app.engine.tools",
+    }
+
+    def load_tools(tool_type: str, tool_name: str, config: dict) -> list[FunctionTool]:
+        source_package = ToolFactory.TOOL_SOURCE_PACKAGE_MAP[tool_type]
+        try:
+            if "ToolSpec" in tool_name:
+                tool_package, tool_cls_name = tool_name.split(".")
+                module_name = f"{source_package}.{tool_package}"
+                module = importlib.import_module(module_name)
+                tool_class = getattr(module, tool_cls_name)
+                tool_spec: BaseToolSpec = tool_class(**config)
+                return tool_spec.to_tool_list()
+            else:
+                module = importlib.import_module(f"{source_package}.{tool_name}")
+                tools = module.get_tools(**config)
+                if not all(isinstance(tool, FunctionTool) for tool in tools):
+                    raise ValueError(
+                        f"The module {module} does not contain valid tools"
+                    )
+                return tools
+        except ImportError as e:
+            raise ValueError(f"Failed to import tool {tool_name}: {e}")
+        except AttributeError as e:
+            raise ValueError(f"Failed to load tool {tool_name}: {e}")
+
+    @staticmethod
+    def from_env() -> list[FunctionTool]:
+        tools = []
+        if os.path.exists("config/tools.yaml"):
+            with open("config/tools.yaml", "r") as f:
+                tool_configs = yaml.safe_load(f)
+                for tool_type, config_entries in tool_configs.items():
+                    for tool_name, config in config_entries.items():
+                        tools.extend(
+                            ToolFactory.load_tools(tool_type, tool_name, config)
+                        )
+        return tools
@@ -0,0 +1,36 @@
+from llama_index.core.tools.function_tool import FunctionTool
+
+
+def duckduckgo_search(
+    query: str,
+    region: str = "wt-wt",
+    max_results: int = 10,
+):
+    """
+    Use this function to search for any query in DuckDuckGo.
+    Args:
+        query (str): The query to search in DuckDuckGo.
+        region Optional(str): The region to be used for the search in [country-language] convention, ex us-en, uk-en, ru-ru, etc...
+        max_results Optional(int): The maximum number of results to be returned. Default is 10.
+    """
+    try:
+        from duckduckgo_search import DDGS
+    except ImportError:
+        raise ImportError(
+            "duckduckgo_search package is required to use this function."
+            "Please install it by running: `poetry add duckduckgo_search` or `pip install duckduckgo_search`"
+        )
+
+    params = {
+        "keywords": query,
+        "region": region,
+        "max_results": max_results,
+    }
+    results = []
+    with DDGS() as ddg:
+        results = list(ddg.text(**params))
+    return results
+
+
+def get_tools(**kwargs):
+    return [FunctionTool.from_defaults(duckduckgo_search)]
@@ -0,0 +1,108 @@
+import os
+import uuid
+import logging
+import requests
+from typing import Optional
+from pydantic import BaseModel, Field
+from llama_index.core.tools import FunctionTool
+
+logger = logging.getLogger(__name__)
+
+
+class ImageGeneratorToolOutput(BaseModel):
+    is_success: bool = Field(
+        ...,
+        description="Whether the image generation was successful.",
+    )
+    image_url: Optional[str] = Field(
+        None,
+        description="The URL of the generated image.",
+    )
+    error_message: Optional[str] = Field(
+        None,
+        description="The error message if the image generation failed.",
+    )
+
+
+class ImageGeneratorTool:
+    _IMG_OUTPUT_FORMAT = "webp"
+    _IMG_OUTPUT_DIR = "output/tool"
+    _IMG_GEN_API = "https://api.stability.ai/v2beta/stable-image/generate/core"
+
+    def __init__(self, api_key: str = None):
+        if not api_key:
+            api_key = os.getenv("STABILITY_API_KEY")
+        self._api_key = api_key
+        self.fileserver_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
+        if self._api_key is None:
+            raise ValueError(
+                "STABILITY_API_KEY key is required to run image generator. Get it here: https://platform.stability.ai/account/keys"
+            )
+        if self.fileserver_url_prefix is None:
+            raise ValueError("FILESERVER_URL_PREFIX is required.")
+
+    def _prepare_output_dir(self):
+        """
+        Create the output directory if it doesn't exist
+        """
+        if not os.path.exists(self._IMG_OUTPUT_DIR):
+            os.makedirs(self._IMG_OUTPUT_DIR, exist_ok=True)
+
+    def _save_image(self, image_data: bytes):
+        self._prepare_output_dir()
+        filename = f"{uuid.uuid4()}.{self._IMG_OUTPUT_FORMAT}"
+        output_path = os.path.join(self._IMG_OUTPUT_DIR, filename)
+        with open(output_path, "wb") as f:
+            f.write(image_data)
+        url = f"{os.getenv('FILESERVER_URL_PREFIX')}/{self._IMG_OUTPUT_DIR}/{filename}"
+        logger.info(f"Saved image to {output_path}.\nURL: {url}")
+        return url
+
+    def _call_stability_api(self, prompt: str):
+        headers = {
+            "authorization": f"Bearer {self._api_key}",
+            "accept": "image/*",
+        }
+        data = {
+            "prompt": prompt,
+            "output_format": self._IMG_OUTPUT_FORMAT,
+        }
+
+        response = requests.post(
+            self._IMG_GEN_API,
+            headers=headers,
+            files={"none": ""},
+            data=data,
+        )
+        response.raise_for_status()
+
+        return response
+
+    def generate_image(self, prompt: str) -> ImageGeneratorToolOutput:
+        """
+        Use this tool to generate an image based on the prompt.
+        Args:
+            prompt (str): The prompt to generate the image from.
+        """
+
+        try:
+            # Call the Stability API
+            response = self._call_stability_api(prompt)
+
+            # Save the image and get the URL
+            image_url = self._save_image(response.content)
+
+            return ImageGeneratorToolOutput(
+                is_success=True,
+                image_url=image_url,
+            )
+        except Exception as e:
+            logger.exception(e, exc_info=True)
+            return ImageGeneratorToolOutput(
+                is_success=False,
+                error_message=str(e),
+            )
+
+
+def get_tools(**kwargs):
+    return [FunctionTool.from_defaults(ImageGeneratorTool(**kwargs).generate_image)]
@@ -0,0 +1,143 @@
+import os
+import logging
+import base64
+import uuid
+from pydantic import BaseModel
+from typing import List, Tuple, Dict, Optional
+from llama_index.core.tools import FunctionTool
+from e2b_code_interpreter import CodeInterpreter
+from e2b_code_interpreter.models import Logs
+
+
+logger = logging.getLogger(__name__)
+
+
+class InterpreterExtraResult(BaseModel):
+    type: str
+    content: Optional[str] = None
+    filename: Optional[str] = None
+    url: Optional[str] = None
+
+
+class E2BToolOutput(BaseModel):
+    is_error: bool
+    logs: Logs
+    results: List[InterpreterExtraResult] = []
+
+
+class E2BCodeInterpreter:
+
+    output_dir = "output/tool"
+
+    def __init__(self, api_key: str = None):
+        if api_key is None:
+            api_key = os.getenv("E2B_API_KEY")
+        filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
+        if not api_key:
+            raise ValueError(
+                "E2B_API_KEY key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key"
+            )
+        if not filesever_url_prefix:
+            raise ValueError(
+                "FILESERVER_URL_PREFIX is required to display file output from sandbox"
+            )
+
+        self.filesever_url_prefix = filesever_url_prefix
+        self.interpreter = CodeInterpreter(api_key=api_key)
+
+    def __del__(self):
+        self.interpreter.close()
+
+    def get_output_path(self, filename: str) -> str:
+        # if output directory doesn't exist, create it
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir, exist_ok=True)
+        return os.path.join(self.output_dir, filename)
+
+    def save_to_disk(self, base64_data: str, ext: str) -> Dict:
+        filename = f"{uuid.uuid4()}.{ext}"  # generate a unique filename
+        buffer = base64.b64decode(base64_data)
+        output_path = self.get_output_path(filename)
+
+        try:
+            with open(output_path, "wb") as file:
+                file.write(buffer)
+        except IOError as e:
+            logger.error(f"Failed to write to file {output_path}: {str(e)}")
+            raise e
+
+        logger.info(f"Saved file to {output_path}")
+
+        return {
+            "outputPath": output_path,
+            "filename": filename,
+        }
+
+    def get_file_url(self, filename: str) -> str:
+        return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}"
+
+    def parse_result(self, result) -> List[InterpreterExtraResult]:
+        """
+        The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
+        We save each result to disk and return saved file metadata (extension, filename, url)
+        """
+        if not result:
+            return []
+
+        output = []
+
+        try:
+            formats = result.formats()
+            results = [result[format] for format in formats]
+
+            for ext, data in zip(formats, results):
+                match ext:
+                    case "png" | "svg" | "jpeg" | "pdf":
+                        result = self.save_to_disk(data, ext)
+                        filename = result["filename"]
+                        output.append(
+                            InterpreterExtraResult(
+                                type=ext,
+                                filename=filename,
+                                url=self.get_file_url(filename),
+                            )
+                        )
+                    case _:
+                        output.append(
+                            InterpreterExtraResult(
+                                type=ext,
+                                content=data,
+                            )
+                        )
+        except Exception as error:
+            logger.exception(error, exc_info=True)
+            logger.error("Error when parsing output from E2b interpreter tool", error)
+
+        return output
+
+    def interpret(self, code: str) -> E2BToolOutput:
+        """
+        Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error.
+
+        Parameters:
+            code (str): The python code to be executed in a single cell.
+        """
+        logger.info(
+            f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
+        )
+        exec = self.interpreter.notebook.exec_cell(code)
+
+        if exec.error:
+            logger.error("Error when executing code", exec.error)
+            output = E2BToolOutput(is_error=True, logs=exec.logs, results=[])
+        else:
+            if len(exec.results) == 0:
+                output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
+            else:
+                results = self.parse_result(exec.results[0])
+                output = E2BToolOutput(is_error=False, logs=exec.logs, results=results)
+        return output
+
+
+def get_tools(**kwargs):
+    return [FunctionTool.from_defaults(E2BCodeInterpreter(**kwargs).interpret)]
@@ -0,0 +1,78 @@
+from typing import Dict, List, Tuple
+from llama_index.tools.openapi import OpenAPIToolSpec
+from llama_index.tools.requests import RequestsToolSpec
+
+
+class OpenAPIActionToolSpec(OpenAPIToolSpec, RequestsToolSpec):
+    """
+    A combination of OpenAPI and Requests tool specs that can parse OpenAPI specs and make requests.
+
+    openapi_uri: str: The file path or URL to the OpenAPI spec.
+    domain_headers: dict: Whitelist domains and the headers to use.
+    """
+
+    spec_functions = OpenAPIToolSpec.spec_functions + RequestsToolSpec.spec_functions
+    # Cached parsed specs by URI
+    _specs: Dict[str, Tuple[Dict, List[str]]] = {}
+
+    def __init__(self, openapi_uri: str, domain_headers: dict = None, **kwargs):
+        if domain_headers is None:
+            domain_headers = {}
+        if openapi_uri not in self._specs:
+            openapi_spec, servers = self._load_openapi_spec(openapi_uri)
+            self._specs[openapi_uri] = (openapi_spec, servers)
+        else:
+            openapi_spec, servers = self._specs[openapi_uri]
+
+        # Add the servers to the domain headers if they are not already present
+        for server in servers:
+            if server not in domain_headers:
+                domain_headers[server] = {}
+
+        OpenAPIToolSpec.__init__(self, spec=openapi_spec)
+        RequestsToolSpec.__init__(self, domain_headers)
+
+    @staticmethod
+    def _load_openapi_spec(uri: str) -> Tuple[Dict, List[str]]:
+        """
+        Load an OpenAPI spec from a URI.
+
+        Args:
+            uri (str): A file path or URL to the OpenAPI spec.
+
+        Returns:
+            List[Document]: A list of Document objects.
+        """
+        import yaml
+        from urllib.parse import urlparse
+
+        if uri.startswith("http"):
+            import requests
+
+            response = requests.get(uri)
+            if response.status_code != 200:
+                raise ValueError(
+                    "Could not initialize OpenAPIActionToolSpec: "
+                    f"Failed to load OpenAPI spec from {uri}, status code: {response.status_code}"
+                )
+            spec = yaml.safe_load(response.text)
+        elif uri.startswith("file"):
+            filepath = urlparse(uri).path
+            with open(filepath, "r") as file:
+                spec = yaml.safe_load(file)
+        else:
+            raise ValueError(
+                "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI URI provided. "
+                "Only HTTP and file path are supported."
+            )
+        # Add the servers to the whitelist
+        try:
+            servers = [
+                urlparse(server["url"]).netloc for server in spec.get("servers", [])
+            ]
+        except KeyError as e:
+            raise ValueError(
+                "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI spec provided. "
+                "Could not get `servers` from the spec."
+            ) from e
+        return spec, servers
@@ -0,0 +1,73 @@
+"""Open Meteo weather map tool spec."""
+
+import logging
+import requests
+import pytz
+from llama_index.core.tools import FunctionTool
+
+logger = logging.getLogger(__name__)
+
+
+class OpenMeteoWeather:
+    geo_api = "https://geocoding-api.open-meteo.com/v1"
+    weather_api = "https://api.open-meteo.com/v1"
+
+    @classmethod
+    def _get_geo_location(cls, location: str) -> dict:
+        """Get geo location from location name."""
+        params = {"name": location, "count": 10, "language": "en", "format": "json"}
+        response = requests.get(f"{cls.geo_api}/search", params=params)
+        if response.status_code != 200:
+            raise Exception(f"Failed to fetch geo location: {response.status_code}")
+        else:
+            data = response.json()
+            result = data["results"][0]
+            geo_location = {
+                "id": result["id"],
+                "name": result["name"],
+                "latitude": result["latitude"],
+                "longitude": result["longitude"],
+            }
+            return geo_location
+
+    @classmethod
+    def get_weather_information(cls, location: str) -> dict:
+        """Use this function to get the weather of any given location.
+        Note that the weather code should follow WMO Weather interpretation codes (WW):
+        0: Clear sky
+        1, 2, 3: Mainly clear, partly cloudy, and overcast
+        45, 48: Fog and depositing rime fog
+        51, 53, 55: Drizzle: Light, moderate, and dense intensity
+        56, 57: Freezing Drizzle: Light and dense intensity
+        61, 63, 65: Rain: Slight, moderate and heavy intensity
+        66, 67: Freezing Rain: Light and heavy intensity
+        71, 73, 75: Snow fall: Slight, moderate, and heavy intensity
+        77: Snow grains
+        80, 81, 82: Rain showers: Slight, moderate, and violent
+        85, 86: Snow showers slight and heavy
+        95: Thunderstorm: Slight or moderate
+        96, 99: Thunderstorm with slight and heavy hail
+        """
+        logger.info(
+            f"Calling open-meteo api to get weather information of location: {location}"
+        )
+        geo_location = cls._get_geo_location(location)
+        timezone = pytz.timezone("UTC").zone
+        params = {
+            "latitude": geo_location["latitude"],
+            "longitude": geo_location["longitude"],
+            "current": "temperature_2m,weather_code",
+            "hourly": "temperature_2m,weather_code",
+            "daily": "weather_code",
+            "timezone": timezone,
+        }
+        response = requests.get(f"{cls.weather_api}/forecast", params=params)
+        if response.status_code != 200:
+            raise Exception(
+                f"Failed to fetch weather information: {response.status_code}"
+            )
+        return response.json()
+
+
+def get_tools(**kwargs):
+    return [FunctionTool.from_defaults(OpenMeteoWeather.get_weather_information)]