diff options
| author | uvok | 2025-06-29 14:46:48 +0200 |
|---|---|---|
| committer | uvok | 2025-06-29 14:46:48 +0200 |
| commit | 162c6e4b14f2ed52beb80b103cae448bbfc4a903 (patch) | |
| tree | 220c70310dda862517f4a9b2a506d01941d914ad | |
| parent | 9a70dda521b2ea481200eefc80e4755ee5279927 (diff) | |
Wrap links differently
| -rw-r--r-- | converters/html_to_md.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/converters/html_to_md.py b/converters/html_to_md.py index b6aeae0..e3109e1 100644 --- a/converters/html_to_md.py +++ b/converters/html_to_md.py @@ -3,12 +3,33 @@ from os import walk import os.path from markdownify import markdownify +from bs4 import BeautifulSoup + +def wrap_links(html): + soup = BeautifulSoup(html, 'html.parser') + for li in soup.find_all('li'): + a_tags = li.find_all('a') + if len(a_tags) == 1: + a = a_tags[0] + full_text = li.get_text(' ', strip=True) + a.string = full_text + li.clear() + li.append(a) + elif len(a_tags) > 1: +# new_tags_text = [f"<li>{line}</li>" for line in li.decode_contents().split('\n')] +# new_tags = BeautifulSoup(''.join(new_tags_text), 'html.parser') +# li.replace_with(new_tags) + pass + return str(soup) + + def convert_html_to_md(HtmlList): for path in HtmlList: pathsplit = path.split("/") file = open(str(path), "r").read() + file = wrap_links(file) html = markdownify(file, heading_style="ATX") f = open("output/markdown/" + str(pathsplit[-1]).replace(".html", ".md"), "w") f.write(html) |
