From 162c6e4b14f2ed52beb80b103cae448bbfc4a903 Mon Sep 17 00:00:00 2001 From: uvok Date: Sun, 29 Jun 2025 14:46:48 +0200 Subject: Wrap links differently --- converters/html_to_md.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'converters') diff --git a/converters/html_to_md.py b/converters/html_to_md.py index b6aeae0..e3109e1 100644 --- a/converters/html_to_md.py +++ b/converters/html_to_md.py @@ -3,12 +3,33 @@ from os import walk import os.path from markdownify import markdownify +from bs4 import BeautifulSoup + +def wrap_links(html): + soup = BeautifulSoup(html, 'html.parser') + for li in soup.find_all('li'): + a_tags = li.find_all('a') + if len(a_tags) == 1: + a = a_tags[0] + full_text = li.get_text(' ', strip=True) + a.string = full_text + li.clear() + li.append(a) + elif len(a_tags) > 1: +# new_tags_text = [f"
  • {line}
  • " for line in li.decode_contents().split('\n')] +# new_tags = BeautifulSoup(''.join(new_tags_text), 'html.parser') +# li.replace_with(new_tags) + pass + return str(soup) + + def convert_html_to_md(HtmlList): for path in HtmlList: pathsplit = path.split("/") file = open(str(path), "r").read() + file = wrap_links(file) html = markdownify(file, heading_style="ATX") f = open("output/markdown/" + str(pathsplit[-1]).replace(".html", ".md"), "w") f.write(html) -- cgit v1.2.3