Files
nekretnine_parser/parsers/halooglasi.py
2022-10-20 10:28:21 +02:00

65 lines
2.5 KiB
Python

import json
from bs4 import BeautifulSoup
from dateutil.parser import parse
from .base import BaseParser, AnnoncementPreview, Announcement
class HalooglasiParser(BaseParser):
BASE_LINK = "https://www.halooglasi.com"
BASE_SEARCH_LINK = "https://www.halooglasi.com/nekretnine/izdavanje-stanova/novi-sad"
BASE_PARAMS = {
"cena_d_to": 1000,
"cena_d_unit": 4,
"kvadratura_d_from": 30,
"kvadratura_d_unit": 1,
"namestenost_id_l": "563,562"
}
PAGE_PARAM = "page"
@classmethod
def process_previews_page(cls, bs: BeautifulSoup) -> list[AnnoncementPreview]:
result: list[AnnoncementPreview] = []
product_list_el = bs.find("div", {"class": "row product-list"})
for product_el in product_list_el.find_all("div", {"class": "col-md-12 col-sm-12 col-xs-12 col-lg-12"}):
publish_date_el = product_el.find("span", {"class": "publish-date"})
title_el = product_el.find("h3", {"class": "product-title"})
link_el = title_el.find("a")
# features_el = product_el.find("ul", {"class": "product-features"})
result.append(
AnnoncementPreview(
title=title_el.text,
# square=float(features_el.contents[0].text.split("\xa0")[0]),
# floor=float(features_el.contents[1].text.split("\xa0")[0].replace("+", "")),
update_date=parse(publish_date_el.text[:-1]).date(),
link=cls.BASE_LINK + link_el.attrs["href"],
)
)
return result
@classmethod
def process_annoncement_data(cls, bs: BeautifulSoup, preview: AnnoncementPreview) -> Announcement:
pre_content = bs.find("div", {"class": "pre-content"})
data_div = pre_content.contents[3].find("script")
data_string = data_div.text.split("\r\n")[2] \
.replace("\tQuidditaEnvironment.CurrentClassified=", "") \
.replace("; for (var i in QuidditaEnvironment.CurrentClassified.OtherFields) { QuidditaEnvironment.CurrentClassified[i] = QuidditaEnvironment.CurrentClassified.OtherFields[i]; };", "")
data = json.loads(data_string)
return Announcement(
title=data["Title"],
# square=data["OtherFields"]["kvadratura_d"],
# floor=float(data["OtherFields"]["broj_soba_s"].replace("+", "")),
description=data["TextHtml"],
price=data["OtherFields"]["cena_d"],
update_date=parse(data["ValidFrom"]).date(),
link=preview.link
)