|
|
- #!/usr/bin/env python3
-
- import requests
- import logging
- import xml.etree.ElementTree as ET
- import json
- import unidecode
- import sqlite3
- import common
- import traceback
- import sys
-
- from bs4 import BeautifulSoup
- from datetime import datetime
- from kafka import KafkaProducer
-
-
- def get_categories(url):
- page = requests.get(url)
- root = ET.fromstring(page.content)
-
- categories = {}
-
- for cat_elm in root.findall("./Data/CategoryNavigationViewModel"):
- cat_label = cat_elm.find("Label")
- cat_url = cat_elm.find("Url")
- categories[cat_label.text] = cat_url.text
-
- return categories
-
-
- def get_products(config, cat_url, kafka_producer):
- url = config['action']['products_url']
- url = url.replace("__url__", cat_url)
- url = url.replace("__offset__", "0")
- total_url = url.replace("__limit__", "1")
- products_cookie = config['action']['products_cookie']
-
- get_total = requests.get(total_url, cookies=products_cookie)
- data_json = json.loads(unidecode.unidecode(get_total.content.decode('utf-8', 'ignore')))
- totalCount = data_json['totalCount']
-
- all_url = url.replace("__limit__", str(totalCount))
- products = requests.get(all_url, cookies=products_cookie)
- products_json = json.loads(unidecode.unidecode(products.content.decode('utf-8', 'ignore')))
-
- for product_json in products_json['items']:
- try:
- if product_json['type'] != 'product':
- continue
- product = {}
- price = {}
- product['title'] = product_json['title']
- product['url'] = config['action']['url'] + product_json['url']
- product['image_url'] = config['action']['url'] + product_json['imageUrl']
- product['info'] = product_json['subTitle']
- product['sku_code'] = product_json['code']
- for spec in product_json['specifications']:
- if spec['id'] == 'attEANCodeVariant':
- product['ean_code'] = spec['value']
- if spec['id'] == 'attLongDescription':
- product['description'] = spec['value']
- product['brand'] = product_json['brandName']
- price['price'] = product_json['price']
- price['promo'] = 0 if product_json['isDeal'] == False else 1
- price['promo_start'] = product_json['dealStartDate']
- price['promo_end'] = product_json['dealEndDate']
-
- common.add_product(kafka_producer, config['action']['name'], product, price)
- except Exception as err:
- common.dump_failed_product(config['action']['name'], cat_url, product_json, err, traceback.format_exc())
-
-
- if __name__ == "__main__" :
- config = common.get_config()
-
- producer = KafkaProducer(bootstrap_servers=[config['config']['kafka_boostrap_servers']])
- common.insert_update_store(producer, {'store': config['action']['name'], 'url': config['action']['url'], 'image_url': config['action']['logo']})
- common.clear_failed_product(config['action']['name'])
-
- categories = get_categories(config['action']['categories_url'])
-
- for cat in categories:
- soup = common.get_soup_page_no_limit(f"{config['action']['url']}{categories[cat]}")
- subcat = soup.find_all("a", class_="subcategory-cta-list__cta")
- for sc in subcat:
- get_products(config, sc['href'], producer)
-
- common.update_store_prices(producer, config['action'])
|