diff --git a/lib/helpers.py b/lib/helpers.py index 3e39ff0..80d28cc 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -59,6 +59,7 @@ class job(): return "%s| %s|%s|%s|%s|%s|%s" % (self.title,self.profession,self.company,self.location,self.date,self.description,self.link) def finder(results,item,**modes): + GETCHILDREN = modes.get("GETCHILDREN",'') ATTRS = modes.get('ATTRS',0) LOCATION_CLEANUP = modes.get('LOCATION_CLEANUP',0) LINK = modes.get('LINK',0) @@ -71,7 +72,6 @@ def finder(results,item,**modes): log("Item tag: ",item.tag) log("Modes:",modes) log("tag_content: ",item.tag_content) - for entry in results: if ATTRS==1: result = entry.findAll(item.tag,attrs=item.tag_content) @@ -79,7 +79,7 @@ def finder(results,item,**modes): else: result = entry.findAll(item.tag,class_=item.tag_content) log("found count results:",len(result)) - if item.name == "TITLE!!" and len(result) == 0 and DEBUG == True: + if len(result) == 0 and DEBUG == True: for x in results: log(x) input() @@ -91,7 +91,19 @@ def finder(results,item,**modes): # input("Press Enter..") i=(len(result)-1) result2 = result[i] - if LOCATION_CLEANUP==1: + if GETCHILDREN!='': + found = False + for results in result: + child = results.find(GETCHILDREN) + log(child) + if child != None and found == False: + log("CHILD: ",child.text.strip()) + found = True + content.append(child.text.strip()) + if found == False: + content.append("CHILD_NOT_FOUND: " + GETCHILDREN) + + elif LOCATION_CLEANUP==1: location = CleanLocation(result2.text.strip()) content.append(location) elif LINK==1: diff --git a/lib/scrap_jobs.py b/lib/scrap_jobs.py index b838b4e..b962e1f 100644 --- a/lib/scrap_jobs.py +++ b/lib/scrap_jobs.py @@ -48,10 +48,9 @@ def scrap_jobs(url,entry,session): location_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn" location = item("p",location_class,0) ar_location = finder(results,location,LOCATION_CLEANUP=1) - company_class = "P-sc-hyu5hk-0 Text__p2-sc-1lu7urs-10 Span-sc-1ybanni-0 Text__span-sc-1lu7urs-12 Text-sc-1lu7urs-13 jZCxUn" - company = item("p",company_class,3) - ar_company = finder(results,company,DEFAULT=1) + company = item("p",company_class,0) + ar_company = finder(results,company,DEFAULT=1,GETCHILDREN='strong') title = item("span","jlFpCz",0,"TITLE!!") ar_title = finder(results,title,DEFAULT=1)