diff --git a/backend/app/engine/loaders/markdownReader.py b/backend/app/engine/loaders/markdownReader.py index e5e465d..bf30a16 100644 --- a/backend/app/engine/loaders/markdownReader.py +++ b/backend/app/engine/loaders/markdownReader.py @@ -27,17 +27,19 @@ class ChunkMarkdownReader(MarkdownReader): for line in lines: tokensNum += self._token_size(line) if tokensNum > self._chunkSize and len(current_lines) > 0: - markdown_tups.append((strTitle + strheader , "\n".join(current_lines))) + if len(markdown_tups) == 0: + markdown_tups.append((strTitle + strheader , "\n".join(current_lines))) + else: + markdown_tups.append((strheader , "\n".join(current_lines))) tokensNum = headerSize current_lines.clear() - current_lines.append(line) if line == '\n' or line == '\r': if tokensNum > self._chunkSize: raise ValueError('标题Token数大于chunkSize大小') strTitle = "\n".join(current_lines) - headerSize = headerSize + self._token_size(strTitle) + #headerSize = headerSize + self._token_size(strTitle) current_lines.clear() if line.startswith("|---"): @@ -46,7 +48,11 @@ class ChunkMarkdownReader(MarkdownReader): current_lines.clear() if len(current_lines) > 0: - markdown_tups.append((strTitle + strheader , "\n".join(current_lines))) + if len(markdown_tups) == 0: + markdown_tups.append((strTitle + strheader , "\n".join(current_lines))) + else: + markdown_tups.append((strheader , "\n".join(current_lines))) + return [ ( key if key is None else re.sub(r"#", "", key).strip(),