调整MarkDown文件读取的格式内容
This commit is contained in:
@@ -27,17 +27,19 @@ class ChunkMarkdownReader(MarkdownReader):
|
||||
for line in lines:
|
||||
tokensNum += self._token_size(line)
|
||||
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
||||
if len(markdown_tups) == 0:
|
||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||
else:
|
||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||
tokensNum = headerSize
|
||||
current_lines.clear()
|
||||
|
||||
current_lines.append(line)
|
||||
|
||||
if line == '\n' or line == '\r':
|
||||
if tokensNum > self._chunkSize:
|
||||
raise ValueError('标题Token数大于chunkSize大小')
|
||||
strTitle = "\n".join(current_lines)
|
||||
headerSize = headerSize + self._token_size(strTitle)
|
||||
#headerSize = headerSize + self._token_size(strTitle)
|
||||
current_lines.clear()
|
||||
|
||||
if line.startswith("|---"):
|
||||
@@ -46,7 +48,11 @@ class ChunkMarkdownReader(MarkdownReader):
|
||||
current_lines.clear()
|
||||
|
||||
if len(current_lines) > 0:
|
||||
if len(markdown_tups) == 0:
|
||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||
else:
|
||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||
|
||||
return [
|
||||
(
|
||||
key if key is None else re.sub(r"#", "", key).strip(),
|
||||
|
||||
Reference in New Issue
Block a user