|
#!/usr/bin/env python3
|
|
|
|
import requests
|
|
import logging
|
|
import xml.etree.ElementTree as ET
|
|
import json
|
|
import unidecode
|
|
import sqlite3
|
|
import common
|
|
import traceback
|
|
import sys
|
|
|
|
from bs4 import BeautifulSoup
|
|
from datetime import datetime
|
|
from kafka import KafkaProducer
|
|
|
|
|
|
def get_categories(url):
|
|
page = requests.get(url)
|
|
root = ET.fromstring(page.content)
|
|
|
|
categories = {}
|
|
|
|
for cat_elm in root.findall("./Data/CategoryNavigationViewModel"):
|
|
cat_label = cat_elm.find("Label")
|
|
cat_url = cat_elm.find("Url")
|
|
categories[cat_label.text] = cat_url.text
|
|
|
|
return categories
|
|
|
|
|
|
def get_products(config, cat_url, kafka_producer):
|
|
url = config['action']['products_url']
|
|
url = url.replace("__url__", cat_url)
|
|
url = url.replace("__offset__", "0")
|
|
total_url = url.replace("__limit__", "1")
|
|
products_cookie = config['action']['products_cookie']
|
|
|
|
get_total = requests.get(total_url, cookies=products_cookie)
|
|
data_json = json.loads(unidecode.unidecode(get_total.content.decode('utf-8', 'ignore')))
|
|
totalCount = data_json['totalCount']
|
|
|
|
all_url = url.replace("__limit__", str(totalCount))
|
|
products = requests.get(all_url, cookies=products_cookie)
|
|
products_json = json.loads(unidecode.unidecode(products.content.decode('utf-8', 'ignore')))
|
|
|
|
for product_json in products_json['items']:
|
|
try:
|
|
if product_json['type'] != 'product':
|
|
continue
|
|
product = {}
|
|
price = {}
|
|
product['title'] = product_json['title']
|
|
product['url'] = config['action']['url'] + product_json['url']
|
|
product['image_url'] = config['action']['url'] + product_json['imageUrl']
|
|
product['info'] = product_json['subTitle']
|
|
product['sku_code'] = product_json['code']
|
|
for spec in product_json['specifications']:
|
|
if spec['id'] == 'attEANCodeVariant':
|
|
product['ean_code'] = spec['value']
|
|
if spec['id'] == 'attLongDescription':
|
|
product['description'] = spec['value']
|
|
product['brand'] = product_json['brandName']
|
|
price['price'] = product_json['price']
|
|
price['promo'] = 0 if product_json['isDeal'] == False else 1
|
|
price['promo_start'] = product_json['dealStartDate']
|
|
price['promo_end'] = product_json['dealEndDate']
|
|
|
|
common.add_product(kafka_producer, config['action']['name'], product, price)
|
|
except Exception as err:
|
|
common.dump_failed_product(config['action']['name'], cat_url, product_json, err, traceback.format_exc())
|
|
|
|
|
|
if __name__ == "__main__" :
|
|
config = common.get_config()
|
|
|
|
producer = KafkaProducer(bootstrap_servers=[config['config']['kafka_boostrap_servers']])
|
|
common.insert_update_store(producer, {'store': config['action']['name'], 'url': config['action']['url'], 'image_url': config['action']['logo']})
|
|
common.clear_failed_product(config['action']['name'])
|
|
|
|
categories = get_categories(config['action']['categories_url'])
|
|
|
|
for cat in categories:
|
|
soup = common.get_soup_page_no_limit(f"{config['action']['url']}{categories[cat]}")
|
|
subcat = soup.find_all("a", class_="subcategory-cta-list__cta")
|
|
for sc in subcat:
|
|
get_products(config, sc['href'], producer)
|
|
|
|
common.update_store_prices(producer, config['action'])
|