add link validation checker, which removes invalid entries

This commit is contained in:
2024-07-22 11:50:52 +02:00
parent 0870255544
commit dd60c722d8
2 changed files with 31 additions and 1 deletions

View File

@@ -1,6 +1,7 @@
import sqlite3
import mmh3
import sys
import requests
DEBUG = False
def log(*s):
@@ -91,3 +92,30 @@ def writedb(jobs):
else:
print("NEW_ENTRY")
cursor.execute("INSERT INTO jobs (star,tag,title,company,location,link,pubdate,hash) VALUES (?,?,?,?,?,?,?,?)",(job.starred,job.tag,job.title,job.company,job.location,job.link,job.date,hash1))
def isStillValid(file):
rows = [0,0,0]
with sqlite3.connect(file) as connection:
cmd_read_chunk = f"""SELECT link from jobs;"""
cursor = connection.cursor()
cursor.execute(cmd_read_chunk)
while(len(rows)!=0):
isLink = True
rows = []
rows = cursor.fetchmany(256)
for row in rows:
with requests.Session() as session:
print("row: ",row[0])
try:
page = session.get(row[0])
except:
print("link is no valid URL so remove item")
isLink = False
finally:
if page.ok == False or isLink == False:
print("link is no more valid, remove item")
rm_cursor = connection.cursor()
rm_itm = rm_cursor.execute(f"""DELETE from jobs WHERE link = ?;""",(row[0],))
print ("Deletion resultet in: ", rm_itm)