logic error when pubdate not found

This commit is contained in:
2024-06-17 10:27:13 +02:00
parent c35c23f073
commit ac8c7251e8
3 changed files with 13 additions and 9 deletions

View File

@@ -1,5 +1,5 @@
from helpers import *
DEBUG = True
DEBUG = False
def log(*s):
if DEBUG:
@@ -9,10 +9,10 @@ def indeed_com(url,session):
if(session == 0):
with requests.Session() as session:
page = session.get(url)
print(page)
log(page)
else:
page = session.get(url)
print(page)
log(page)
soup = BeautifulSoup(page.content,"html.parser")
#print(soup.prettify())
@@ -109,7 +109,7 @@ def next_url_jobagent(base_url,session,c):#depreacted will be removed in the fut
for i2 in next_url_names:
striped_string = i2.text.strip()
log(i2.text.strip(),"stripped:",striped_string)
# print("Printable characters?",striped_string.isprintable())
log("Printable characters?",striped_string.isprintable())
if (striped_string) == "Nächste Seite":
log(i2)
next_url = i2.get("href")
@@ -137,8 +137,7 @@ def scrap_jobagent(url,entry,session):
results = soup.find_all("li",class_="item")
if not results:
print("no li items found")
print("page:",page)
input("Press key to continue")
log("page:",page)
title = item("span","jobtitle",0)
ar_title = finder(results,title)