THM html script

import argparse
from bs4 import BeautifulSoup
 
 
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    tasks = soup.find_all('div', class_='card')
 
    output = ""
    for task in tasks:
        task_header = task.find('div', class_='task-header')
        # task_number = task_header.find('span', class_='task-dropdown-title').text.strip()
        task_title_element = task_header.find('span', class_='task-dropdown-title')
        task_name = task_title_element.next_sibling.strip()
 
        task_desc = task.find('div', class_='room-task-desc').text.strip()
 
        # Exclude "start machine" from task description
        task_desc = task_desc.replace('Start Machine', '')
 
        output += f"## {task_name}\n---\n### Information\n{task_desc}\n"
 
        # Check for tables
        tables = task.find_all('table')
        for table in tables:
            output += str(table) + '\n\n'
 
        output += "### Questions\n"
 
        task_questions = task.find_all('div', class_='room-task-questions')
        for question in task_questions:
            question_text = question.find('div', class_='room-task-question-details').text.strip()
            output += f"\n*{question_text}*\n\n"
            # Extracting hint if available
            hint_button = question.find('button', class_='task-hint')
            if hint_button:
                hint_text = hint_button.find_next_sibling('div').text.strip()
                output += f"Hint: {hint_text}\n\n"
            output += ">[!question]- **Answer**\n>\n"
 
        # Check for images and include them with the same style and location
        # images = task.find_all('img')
        # for image in images:
        #     src = image['src']
        #     style = image['style']
        #     output += f'<img src="{src}" style="{style}">\n\n'
 
    return output
 
 
def write_output_file(output, filename):
    with open(filename, 'w') as f:
        f.write(output)
 
def main():
    parser = argparse.ArgumentParser(description="Parse HTML and generate output file.")
    parser.add_argument('-f', '--file', help="HTML file to parse.")
    args = parser.parse_args()
 
    if args.file:
        with open(args.file, 'r') as f:
            html = f.read()
    else:
        print("Please provide an HTML file.")
        exit()
 
    output = parse_html(html)
    output_file = args.file.split('.')[0] + ".md"
    write_output_file(output, output_file)
    print(f"Output file '{output_file}' generated successfully.")
 
if __name__ == "__main__":
    main()
🍵 Christians Pub 🍺

Explorer

THM html script

Graph View

Backlinks