Skip to content

Identify Broken Links

You can identify broken links using the find_broken_urls() function.

find_broken_urls

Find broken URLs.

Parameters:

Name Type Description Default
urls list

A list of URLs to check.

required
timeout int

The timeout in seconds. Defaults to 5.

5

Returns:

Name Type Description
list list

A list of broken URLs.

Example
from seotools import find_broken_urls

urls_to_check = [
    "https://jamesg.blog/",
    "https://jamesg.blog/test/
]

for url in urls_to_check:
    broken_urls = find_broken_urls(url)
    print("Broken URLs identified on " + url + ":")
    print(broken_urls)
Source code in seotools/links/broken.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def find_broken_urls(urls: list, timeout: int = 5) -> list:
    """
    Find broken URLs.

    Args:
        urls (list): A list of URLs to check.
        timeout (int, optional): The timeout in seconds. Defaults to 5.

    Returns:
        list: A list of broken URLs.

    Example:
        ```python
        from seotools import find_broken_urls

        urls_to_check = [
            "https://jamesg.blog/",
            "https://jamesg.blog/test/
        ]

        for url in urls_to_check:
            broken_urls = find_broken_urls(url)
            print("Broken URLs identified on " + url + ":")
            print(broken_urls)
        ```
    """
    broken_urls = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_url = {
            executor.submit(requests.get, url, timeout=timeout): url for url in urls
        }
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]

            try:
                response = future.result()
                if response.status_code != 200:
                    broken_urls.append(url)

            except Exception as exc:
                broken_urls.append(url)
                print(f"{url} generated an exception: {exc}")

    return broken_urls