diff options
Diffstat (limited to 'converters')
| -rw-r--r-- | converters/html_to_md.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/converters/html_to_md.py b/converters/html_to_md.py index b6aeae0..e3109e1 100644 --- a/converters/html_to_md.py +++ b/converters/html_to_md.py @@ -3,12 +3,33 @@ from os import walk import os.path from markdownify import markdownify +from bs4 import BeautifulSoup + +def wrap_links(html): + soup = BeautifulSoup(html, 'html.parser') + for li in soup.find_all('li'): + a_tags = li.find_all('a') + if len(a_tags) == 1: + a = a_tags[0] + full_text = li.get_text(' ', strip=True) + a.string = full_text + li.clear() + li.append(a) + elif len(a_tags) > 1: +# new_tags_text = [f"<li>{line}</li>" for line in li.decode_contents().split('\n')] +# new_tags = BeautifulSoup(''.join(new_tags_text), 'html.parser') +# li.replace_with(new_tags) + pass + return str(soup) + + def convert_html_to_md(HtmlList): for path in HtmlList: pathsplit = path.split("/") file = open(str(path), "r").read() + file = wrap_links(file) html = markdownify(file, heading_style="ATX") f = open("output/markdown/" + str(pathsplit[-1]).replace(".html", ".md"), "w") f.write(html) |
