Description: A python script for accessing a HTML webpage, parsing the links for MP3 files, and downloading them all.
Author: ChatGPT
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
from concurrent.futures import ThreadPoolExecutor, as_completed
# URL of the webpage to download files from
url = "<https://ia600209.us.archive.org/32/items/BasharPlanetaryMidwife_201603/>"
# Create a directory to store downloaded files
if not os.path.exists("mp3_files"):
os.makedirs("mp3_files")
# Send a request to the webpage and get its HTML content
response = requests.get(url)
html_content = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
# Find all <a> tags that link to .mp3 files
mp3_links = soup.find_all("a", href=lambda href: href and href.endswith(".mp3"))
if mp3_links:
# Define a function to download a file and show progress
def download_file(link):
filename = link["href"].split("/")[-1]
file_url = urljoin(url, link["href"])
file_path = os.path.join("mp3_files", filename)
if not os.path.exists(file_path):
with requests.get(file_url, stream=True, allow_redirects=True) as r:
file_size = int(r.headers.get("content-length", 0))
block_size = 8192
downloaded = 0
progress = 0
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=block_size):
if chunk:
f.write(chunk)
downloaded += len(chunk)
progress = int(downloaded / file_size * 50)
print(f"\\rDownloading {filename}: [{'=' * progress}{' ' * (50 - progress)}] {downloaded / 1024:.2f} KB / {file_size / 1024:.2f} KB", end="")
print(f"\\rDownloading {filename}: [{'=' * 50}] {file_size / 1024:.2f} KB / {file_size / 1024:.2f} KB")
print(f"Downloaded {filename}")
else:
print(f"{filename} already exists")
# Download up to 5 files concurrently
with ThreadPoolExecutor(max_workers=5) as executor:
futures = []
for link in mp3_links:
futures.append(executor.submit(download_file, link))
for future in as_completed(futures):
future.result()
else:
print("No .mp3 files found on the webpage.")
To run the Python script on Windows OS, follow these steps:
requests
and beautifulsoup4
libraries, which you can install using pip. Open a command prompt and type the following command:pip install requests beautifulsoup4
This will install both libraries.
download_mp3.py
.Windows Key + R
on your keyboard, typing cmd
in the Run dialog box, and then pressing Enter.cd
command to navigate to the directory where the script is saved. For example, if you saved the script in the Downloads
folder, type:cd C:\\Users\\YourUserName\\Downloads