You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.3 KiB

  1. """
  2. The idea, is for me to crawl the poetry foundation’s, poem of the day page at
  3. https://www.poetryfoundation.org/poems/poem-of-the-day and put it into an rss
  4. file, so that i can read it in my RSS reader
  5. """
  6. from datetime import date, datetime, timedelta
  7. import requests
  8. from bs4 import BeautifulSoup
  9. import PyRSS2Gen
  10. def main():
  11. """
  12. Takes the Poetry foundation daily poem link, gets an updated link if a new poem is posted and then creates an rss feed file.
  13. """
  14. poempage = "https://www.poetryfoundation.org/poems/poem-of-the-day"
  15. reply = get_page_from_pf(poempage)
  16. if reply:
  17. create_rss_feed(reply)
  18. def get_page_from_pf(link):
  19. """
  20. Parses the PF daily poem link page. If it is updated, then it goes and fetches the link to the actual poem page and returns it
  21. :param link: url
  22. :type link: string
  23. :return: url
  24. :rtype: string
  25. """
  26. rawhtml = requests.get(link)
  27. soup = BeautifulSoup(rawhtml.text, "html.parser")
  28. look_full_poem_url = soup.find_all("a", string="Read More")
  29. get_first_element_from_poem_url = look_full_poem_url[0]
  30. full_poem_url = get_first_element_from_poem_url.attrs["href"]
  31. date_of_poem_str = soup.find("meta", {"name": "dcterms.Date"}).get("content")
  32. date_of_poem = datetime.strptime(date_of_poem_str, "%Y-%m-%d")
  33. date_of_poem = date_of_poem.date()
  34. today_date = date.today()
  35. if date_of_poem == today_date:
  36. return full_poem_url
  37. elif date_of_poem == today_date - timedelta(days=1):
  38. return full_poem_url
  39. else:
  40. return None
  41. def create_rss_feed(poemlink):
  42. """
  43. Takes the link that we fetched and then writes it to an xml file for a feed reader to fetch
  44. :param poemlink: url
  45. :type poemlink: string
  46. """
  47. rss = PyRSS2Gen.RSS2(
  48. title="Jason's PF feed",
  49. link=poemlink,
  50. description="Poem of the day",
  51. lastBuildDate=datetime.now(),
  52. items=[
  53. PyRSS2Gen.RSSItem(
  54. title=f"Poem for {date.today()}",
  55. link=poemlink,
  56. guid=PyRSS2Gen.Guid(f"Poem for {date.today()}"),
  57. pubDate=datetime.now(),
  58. ),
  59. ],
  60. )
  61. rss.write_xml(open("poem.xml", "w"))
  62. # create and write_to_rss_feed
  63. if __name__ == "__main__":
  64. main()