调整MarkDown文件读取的格式内容
This commit is contained in:
@@ -27,17 +27,19 @@ class ChunkMarkdownReader(MarkdownReader):
|
|||||||
for line in lines:
|
for line in lines:
|
||||||
tokensNum += self._token_size(line)
|
tokensNum += self._token_size(line)
|
||||||
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
||||||
|
if len(markdown_tups) == 0:
|
||||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||||
|
else:
|
||||||
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
tokensNum = headerSize
|
tokensNum = headerSize
|
||||||
current_lines.clear()
|
current_lines.clear()
|
||||||
|
|
||||||
current_lines.append(line)
|
current_lines.append(line)
|
||||||
|
|
||||||
if line == '\n' or line == '\r':
|
if line == '\n' or line == '\r':
|
||||||
if tokensNum > self._chunkSize:
|
if tokensNum > self._chunkSize:
|
||||||
raise ValueError('标题Token数大于chunkSize大小')
|
raise ValueError('标题Token数大于chunkSize大小')
|
||||||
strTitle = "\n".join(current_lines)
|
strTitle = "\n".join(current_lines)
|
||||||
headerSize = headerSize + self._token_size(strTitle)
|
#headerSize = headerSize + self._token_size(strTitle)
|
||||||
current_lines.clear()
|
current_lines.clear()
|
||||||
|
|
||||||
if line.startswith("|---"):
|
if line.startswith("|---"):
|
||||||
@@ -46,7 +48,11 @@ class ChunkMarkdownReader(MarkdownReader):
|
|||||||
current_lines.clear()
|
current_lines.clear()
|
||||||
|
|
||||||
if len(current_lines) > 0:
|
if len(current_lines) > 0:
|
||||||
|
if len(markdown_tups) == 0:
|
||||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||||
|
else:
|
||||||
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
|
|
||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
key if key is None else re.sub(r"#", "", key).strip(),
|
key if key is None else re.sub(r"#", "", key).strip(),
|
||||||
|
|||||||
Reference in New Issue
Block a user