Monday, 6 January 2020

How to detect with feedparser if there are new items in an RSS channel?

I have the following code. When you understand the code, you can look at the two comments with the capital letters. I could test if there are new items in the channel with insert or ignore but I'm trying the better mechanism with utilization feed.updated_parsed attribute. Why doesn't it work as expected?

def getItemsForChannel(xmlUrl, lastUpdate):   
  socket.setdefaulttimeout(60)
  feedparserDictionary = feedparser.parse(xmlUrl)
  updatedTime = datetime.fromtimestamp(mktime(feedparserDictionary.feed.updated_parsed))
  lst = datetime.strptime(lastUpdate, "%Y-%m-%dT%H:%M:%S.%f")
  if updatedTime < lst:
    return [] # HERE NOT BEHAVING CORRECTLY, WHEN I COMMENT THIS LINE, THERE MAY BE A FEW ITEMS

  items = feedparserDictionary.entries
  print "There are new %d items" % len(items)
  return items

def setChannelUpdateTime(xmlUrl, tm):
  con = sqlite.connect(connectionString)
  cur = con.cursor()
  cur.execute("update channels set updated = :tm where xmlurl = :xmlUrl", locals())
  con.commit()
  print "updated successfully"
  cur.close()
  con.close()

if __name__ == "_main__":
   ...
   ...
   for channel in channels:
     ...
     ...
     for item in items:
       ...
       ...
       cur.execute("insert or ignore into feeds \
                     (title, link, description, read, updated, channelid) \
                     values (?, ?, ?, ?, ?, ?)", \
                     (title, link, description, 0, updated, channelId))
       countOfNewItems += cur.rowcount # WHICH ARE INSERTED HERE
       con.commit()
       cur.close()

       if countOfNewItems:
         print "Found new items"
         now = datetime.now().isoformat()
         if "." not in now:
           now = now + ".000000"
         setChannelUpdateTime(channelXmlUrl, now)


from How to detect with feedparser if there are new items in an RSS channel?

No comments:

Post a Comment