summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--converters/html_to_md.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/converters/html_to_md.py b/converters/html_to_md.py
index b6aeae0..e3109e1 100644
--- a/converters/html_to_md.py
+++ b/converters/html_to_md.py
@@ -3,12 +3,33 @@ from os import walk
import os.path
from markdownify import markdownify
+from bs4 import BeautifulSoup
+
+def wrap_links(html):
+ soup = BeautifulSoup(html, 'html.parser')
+ for li in soup.find_all('li'):
+ a_tags = li.find_all('a')
+ if len(a_tags) == 1:
+ a = a_tags[0]
+ full_text = li.get_text(' ', strip=True)
+ a.string = full_text
+ li.clear()
+ li.append(a)
+ elif len(a_tags) > 1:
+# new_tags_text = [f"<li>{line}</li>" for line in li.decode_contents().split('\n')]
+# new_tags = BeautifulSoup(''.join(new_tags_text), 'html.parser')
+# li.replace_with(new_tags)
+ pass
+ return str(soup)
+
+
def convert_html_to_md(HtmlList):
for path in HtmlList:
pathsplit = path.split("/")
file = open(str(path), "r").read()
+ file = wrap_links(file)
html = markdownify(file, heading_style="ATX")
f = open("output/markdown/" + str(pathsplit[-1]).replace(".html", ".md"), "w")
f.write(html)