初始化提交

This commit is contained in:
2024-08-13 09:37:23 +08:00
parent 4923337038
commit e112fa4e44
50 changed files with 1649 additions and 259 deletions
+2 -1
View File
@@ -11,7 +11,7 @@ class CrawlUrl(BaseModel):
class WebLoaderConfig(BaseModel):
driver_arguments: list[str] = Field(default=None)
urls: list[CrawlUrl]
urls: list[CrawlUrl] = []
def get_web_documents(config: WebLoaderConfig):
@@ -25,6 +25,7 @@ def get_web_documents(config: WebLoaderConfig):
options.add_argument(arg)
docs = []
urls = config.urls or []
for url in config.urls:
scraper = WholeSiteReader(
prefix=url.prefix,