From 4ede40c37c0c8e098a8c66923f2eaea5517a8861 Mon Sep 17 00:00:00 2001 From: ccppi Date: Thu, 18 Jul 2024 11:26:13 +0200 Subject: [PATCH] - change search classes - change some to attributes - implement better debuging solution in finder() --- lib/conf | 6 ++++++ lib/helpers.py | 19 +++++++++++++++---- lib/scrap_jobs.py | 10 +++++----- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/conf b/lib/conf index f3928a3..0c824cd 100644 --- a/lib/conf +++ b/lib/conf @@ -1,3 +1,9 @@ +[jobs.ch_seilbahn] +USER = NONE +PW = NONE +LOGINURL = NONE +SCRAPURL = https://www.jobs.ch/en/vacancies/?term=seilbahn +TAG = Seilbahn [jobagent.ch] USER = j.wyss@kolabnow.ch diff --git a/lib/helpers.py b/lib/helpers.py index 7e32428..3e39ff0 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -5,7 +5,7 @@ from enum import Enum import re from dateconverter import * from datetime import datetime -DEBUG = False +DEBUG = True def log(*s): if DEBUG: @@ -35,10 +35,14 @@ months = [ ('November','11'), ('December','12')] class item(): - def __init__(self,tag,tag_content,index): + def __init__(self,tag,tag_content,index,name=None): self.tag = tag self.tag_content = tag_content self.index = index + if name is not None: + self.name = name + else: + self.name = "not defined" class job(): def __init__(self,title,profession,company,location,date,description,link,tag,starred): @@ -63,15 +67,22 @@ def finder(results,item,**modes): BASEURL = modes.get('BASEURL','') content = [] i = item.index + log("name",item.name) + log("Item tag: ",item.tag) log("Modes:",modes) - + log("tag_content: ",item.tag_content) + for entry in results: if ATTRS==1: result = entry.findAll(item.tag,attrs=item.tag_content) log(item.tag_content) else: result = entry.findAll(item.tag,class_=item.tag_content) - log("found count count results:",len(result)) + log("found count results:",len(result)) + if item.name == "TITLE!!" and len(result) == 0 and DEBUG == True: + for x in results: + log(x) + input() if result: log("theres a result") if i>(len(result)-1): diff --git a/lib/scrap_jobs.py b/lib/scrap_jobs.py index d99474a..b838b4e 100644 --- a/lib/scrap_jobs.py +++ b/lib/scrap_jobs.py @@ -1,5 +1,5 @@ from helpers import * -DEBUG = False +DEBUG = True def log(*s): if DEBUG: @@ -43,7 +43,7 @@ def scrap_jobs(url,entry,session): soup = BeautifulSoup(page.content,"html.parser") #print(soup.prettify()) - results = soup.find_all("div",attrs={"data-feat":"searched_jobs"}) + results = soup.find_all("div",attrs={'data-feat':'searched_jobs'}) location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn" location = item("p",location_class,0) @@ -53,14 +53,14 @@ def scrap_jobs(url,entry,session): company = item("p",company_class,3) ar_company = finder(results,company,DEFAULT=1) - title = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 VacancyItem___StyledText2-sc-iugtv6-5 iaJYDR jlFpCz dMwMcR",0) + title = item("span","jlFpCz",0,"TITLE!!") ar_title = finder(results,title,DEFAULT=1) date = item("span","Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 krGudM hUhFmL",0) ar_date = finder(results,date,CLEANDATE=1) - link = item("a","VacancyLink___StyledLink-sc-ufp08j-0",0) - ar_link = finder(results,link,LINK=1,BASEURL="https://jobs.ch") + link = item("a",{'data-cy' :'job-link'},0) + ar_link = finder(results,link,LINK=1,ATTRS=1,BASEURL="https://jobs.ch") tag = entry.tag#get from config return arrayToClass(ar_title,ar_company,ar_location,ar_date,ar_link,tag)