Description: A python script for accessing a HTML webpage, parsing the links for MP3 files, and downloading them all.

Author: ChatGPT

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
from concurrent.futures import ThreadPoolExecutor, as_completed

# URL of the webpage to download files from
url = "<https://ia600209.us.archive.org/32/items/BasharPlanetaryMidwife_201603/>"

# Create a directory to store downloaded files
if not os.path.exists("mp3_files"):
    os.makedirs("mp3_files")

# Send a request to the webpage and get its HTML content
response = requests.get(url)
html_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")

# Find all <a> tags that link to .mp3 files
mp3_links = soup.find_all("a", href=lambda href: href and href.endswith(".mp3"))

if mp3_links:
    # Define a function to download a file and show progress
    def download_file(link):
        filename = link["href"].split("/")[-1]
        file_url = urljoin(url, link["href"])
        file_path = os.path.join("mp3_files", filename)
        if not os.path.exists(file_path):
            with requests.get(file_url, stream=True, allow_redirects=True) as r:
                file_size = int(r.headers.get("content-length", 0))
                block_size = 8192
                downloaded = 0
                progress = 0
                with open(file_path, "wb") as f:
                    for chunk in r.iter_content(chunk_size=block_size):
                        if chunk:
                            f.write(chunk)
                            downloaded += len(chunk)
                            progress = int(downloaded / file_size * 50)
                            print(f"\\rDownloading {filename}: [{'=' * progress}{' ' * (50 - progress)}] {downloaded / 1024:.2f} KB / {file_size / 1024:.2f} KB", end="")
                print(f"\\rDownloading {filename}: [{'=' * 50}] {file_size / 1024:.2f} KB / {file_size / 1024:.2f} KB")
                print(f"Downloaded {filename}")
        else:
            print(f"{filename} already exists")

    # Download up to 5 files concurrently
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        for link in mp3_links:
            futures.append(executor.submit(download_file, link))
        for future in as_completed(futures):
            future.result()
else:
    print("No .mp3 files found on the webpage.")

To run the Python script on Windows OS, follow these steps:

  1. Install Python: If you don't already have Python installed on your system, you can download it from the official website at **https://www.python.org/downloads/**. Choose the latest version of Python 3.x, download and run the installer, and follow the prompts to install Python.
  2. Install Required Libraries: The script requires the requests and beautifulsoup4 libraries, which you can install using pip. Open a command prompt and type the following command:
pip install requests beautifulsoup4

This will install both libraries.

  1. Save the Script: Copy the script into a text editor, such as Notepad or Visual Studio Code, and save it with a .py extension. For example, you can save it as download_mp3.py.
  2. Open a Command Prompt: Open a command prompt by pressing the Windows Key + R on your keyboard, typing cmd in the Run dialog box, and then pressing Enter.
  3. Navigate to the Script Directory: Use the cd command to navigate to the directory where the script is saved. For example, if you saved the script in the Downloads folder, type:
cd C:\\Users\\YourUserName\\Downloads
  1. Run the Script: Once you are in the directory where the script is saved, type the following command to run the script: