import os
from os import walk
import os.path
from markdownify import markdownify
from bs4 import BeautifulSoup
def wrap_links(html):
soup = BeautifulSoup(html, 'html.parser')
for li in soup.find_all('li'):
a_tags = li.find_all('a')
if len(a_tags) == 1:
a = a_tags[0]
full_text = li.get_text(' ', strip=True)
a.string = full_text
li.clear()
li.append(a)
elif len(a_tags) > 1:
# new_tags_text = [f"
{line}" for line in li.decode_contents().split('\n')]
# new_tags = BeautifulSoup(''.join(new_tags_text), 'html.parser')
# li.replace_with(new_tags)
pass
return str(soup)
def convert_html_to_md(HtmlList):
for path in HtmlList:
pathsplit = path.split("/")
file = open(str(path), "r").read()
file = wrap_links(file)
html = markdownify(file, heading_style="ATX")
f = open("output/markdown/" + str(pathsplit[-1]).replace(".html", ".md"), "w")
f.write(html)
f.close()
return "html was converted to markdown (1/3)"