from queue import Queue from urllib.parse import urljoin import requests from bs4 import BeautifulSoup # r = requests.get('https://vm009.rz.uos.de/crawl/index.html') queue = Queue() visitedLinks = [] queue.put('https://vm009.rz.uos.de/crawl/index.html') visitedLinks.append('https://vm009.rz.uos.de/crawl/index.html') while not queue.empty(): link = queue.get() r = requests.get(link) soup = BeautifulSoup(r.content, 'html.parser') # print(r.content) for l in soup.find_all("a"): url = urljoin('https://vm009.rz.uos.de/crawl/', l['href']) if url not in visitedLinks and 'https://vm009.rz.uos.de/crawl' in url: print(url) print(l.text) queue.put(url) visitedLinks.append(url) print(visitedLinks)