import os.path from queue import Queue from urllib.parse import urljoin, urlparse import requests from bs4 import BeautifulSoup # r = requests.get('https://vm009.rz.uos.de/crawl/index.html') queue = Queue() visited_links = [] start_site = 'https://vm009.rz.uos.de/crawl/index.html' base_site = 'https://vm009.rz.uos.de/crawl/' queue.put(start_site) visited_links.append(start_site) while not queue.empty(): link = queue.get() r = requests.get(link) soup = BeautifulSoup(r.content, 'html.parser') # print(r.content) for l in soup.find_all("a"): url = urlparse(urljoin(base_site, l['href'])) print(os.path.split(url)) if url not in visited_links and base_site in url : print(url) print(l.text) queue.put(url) visited_links.append(url) print(visited_links)