diff --git a/xkcd_alt.py b/xkcd_alt.py index bec06d4d8b1f50f57e1a4fbdf7ad1e4ebd60b118..a85221f976f4ce89a7fd2f77be7d74a93b7f7bc2 100644 --- a/xkcd_alt.py +++ b/xkcd_alt.py @@ -31,13 +31,44 @@ def get_auth(): os.environ.get('XKCD_ACCESS_SECRET_TOKEN', None)] for i in key: if i is None: # Verify keys were loaded - print("OAuth initiation failed: Environmental variable not found") - auth = 'crash' - if key != 'crash': - auth = OAuth1(key[0], key[1], key[2], key[3]) - print('OAuth initiation successful!') - del key - return auth - else: - del key - return auth \ No newline at end of file + print("OAuth initiation failed: Environmental variable {} not found".format(i+1)) + del key + return 'crash' # Enter log protection mode + + auth = OAuth1(key[0], key[1], key[2], key[3]) + print('OAuth initiation successful!') + del key + return auth + + +def retrieve_text(site): + """This retrieves the HTML of the website, isolates the image title text, and formats it for the + Tweet.""" + for attempt in range(11): + print('Accessing {} (attempt {} of 11'.format(site, attempt+1)) + html_raw = requests.get(site) # Retrieving raw HTML data + if html_raw.status_code != 200: # Data not successfully retrieved + if attempt < 6: + print('Could not access XKCD ({}). '.format(html_raw.status_code) + + 'Trying again in 10 seconds...') + time.sleep(10) # Make 6 attempts with 10 second delays + elif attempt < 10: + print('Could not access XKCD ({}). '.format(html_raw.status_code) + + 'Trying again in 60 seconds...') + time.sleep(60) # Make 4 attempts with 60 seconds delays + else: + print('XKCD retrieval failed: could not access {}'.format(site)) + return 'crash' # Enter log protection mode + + html = BeautifulSoup(html_raw, 'html.parser') + comic = html.find('img', title=True) # Locates the only image with title text (the comic) + if comic is None: + print('Title extraction failed: image not found') + return 'crash' # Enter log protection mode + + title = comic['title'] # Extracts the title text + tweet = 'Alt/title text: "{}"'.format(title) # Construct the main Tweet body + + print('Tweet constructed') + del html_raw, html, comic, title + return tweet \ No newline at end of file