Skip to content

Commit

Permalink
added examples setting a timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
andreburgaud committed Sep 17, 2024
1 parent c189226 commit 0eaa230
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions examples/robots_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,37 @@
assert parser.can_fetch("*", "http://www.musi-cal.com/")
assert not parser.can_fetch("*", "http://www.musi-cal.com/wp-admin/")
assert not parser.can_fetch("*", "/wp-admin/")

# Examples with custom timeout
parser = robots.RobotsParser.from_uri("https://robotspy.org/robots.txt", 2)

if parser.errors:
print("ERRORS:")
print(parser.errors)

if parser.warnings:
print("WARNINGS:")
print(parser.errors)

assert parser.can_fetch(
"Googlebot", "https://robotspy.org/"
)
assert parser.can_fetch("*", "https://robotspy.org/")

# Set a 0 timeout should result in an error

parser = robots.RobotsParser.from_uri("https://robotspy.org/robots.txt", 0)
assert parser.errors
if parser.errors:
print("ERRORS:")
print(parser.errors)


# Timeout error
parser = robots.RobotsParser.from_uri("https://robotspy.org:555/robots.txt", 2)

# The duration may be greater than the timeout because the urllib.request.urlopen timeout does not equate to a total timeout
assert parser.errors
if parser.errors:
print("ERRORS:")
print(parser.errors)

0 comments on commit 0eaa230

Please sign in to comment.