You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

89 lines
3.3 KiB

#!/usr/bin/env python3
import requests
import logging
import xml.etree.ElementTree as ET
import json
import unidecode
import sqlite3
import common
import traceback
import sys
from bs4 import BeautifulSoup
from datetime import datetime
from kafka import KafkaProducer
def get_categories(url):
page = requests.get(url)
root = ET.fromstring(page.content)
categories = {}
for cat_elm in root.findall("./Data/CategoryNavigationViewModel"):
cat_label = cat_elm.find("Label")
cat_url = cat_elm.find("Url")
categories[cat_label.text] = cat_url.text
return categories
def get_products(config, cat_url, kafka_producer):
url = config['action']['products_url']
url = url.replace("__url__", cat_url)
url = url.replace("__offset__", "0")
total_url = url.replace("__limit__", "1")
products_cookie = config['action']['products_cookie']
get_total = requests.get(total_url, cookies=products_cookie)
data_json = json.loads(unidecode.unidecode(get_total.content.decode('utf-8', 'ignore')))
totalCount = data_json['totalCount']
all_url = url.replace("__limit__", str(totalCount))
products = requests.get(all_url, cookies=products_cookie)
products_json = json.loads(unidecode.unidecode(products.content.decode('utf-8', 'ignore')))
for product_json in products_json['items']:
try:
if product_json['type'] != 'product':
continue
product = {}
price = {}
product['title'] = product_json['title']
product['url'] = config['action']['url'] + product_json['url']
product['image_url'] = config['action']['url'] + product_json['imageUrl']
product['info'] = product_json['subTitle']
product['sku_code'] = product_json['code']
for spec in product_json['specifications']:
if spec['id'] == 'attEANCodeVariant':
product['ean_code'] = spec['value']
if spec['id'] == 'attLongDescription':
product['description'] = spec['value']
product['brand'] = product_json['brandName']
price['price'] = product_json['price']
price['promo'] = 0 if product_json['isDeal'] == False else 1
price['promo_start'] = product_json['dealStartDate']
price['promo_end'] = product_json['dealEndDate']
common.add_product(kafka_producer, config['action']['name'], product, price)
except Exception as err:
common.dump_failed_product(config['action']['name'], cat_url, product_json, err, traceback.format_exc())
if __name__ == "__main__" :
config = common.get_config()
producer = KafkaProducer(bootstrap_servers=[config['config']['kafka_boostrap_servers']])
common.insert_update_store(producer, {'store': config['action']['name'], 'url': config['action']['url'], 'image_url': config['action']['logo']})
common.clear_failed_product(config['action']['name'])
categories = get_categories(config['action']['categories_url'])
for cat in categories:
soup = common.get_soup_page_no_limit(f"{config['action']['url']}{categories[cat]}")
subcat = soup.find_all("a", class_="subcategory-cta-list__cta")
for sc in subcat:
get_products(config, sc['href'], producer)
common.update_store_prices(producer, config['action'])