You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
3.3 KiB

3 years ago
  1. #!/usr/bin/env python3
  2. import requests
  3. import logging
  4. import xml.etree.ElementTree as ET
  5. import json
  6. import unidecode
  7. import sqlite3
  8. import common
  9. import traceback
  10. import sys
  11. from bs4 import BeautifulSoup
  12. from datetime import datetime
  13. from kafka import KafkaProducer
  14. def get_categories(url):
  15. page = requests.get(url)
  16. root = ET.fromstring(page.content)
  17. categories = {}
  18. for cat_elm in root.findall("./Data/CategoryNavigationViewModel"):
  19. cat_label = cat_elm.find("Label")
  20. cat_url = cat_elm.find("Url")
  21. categories[cat_label.text] = cat_url.text
  22. return categories
  23. def get_products(config, cat_url, kafka_producer):
  24. url = config['action']['products_url']
  25. url = url.replace("__url__", cat_url)
  26. url = url.replace("__offset__", "0")
  27. total_url = url.replace("__limit__", "1")
  28. products_cookie = config['action']['products_cookie']
  29. get_total = requests.get(total_url, cookies=products_cookie)
  30. data_json = json.loads(unidecode.unidecode(get_total.content.decode('utf-8', 'ignore')))
  31. totalCount = data_json['totalCount']
  32. all_url = url.replace("__limit__", str(totalCount))
  33. products = requests.get(all_url, cookies=products_cookie)
  34. products_json = json.loads(unidecode.unidecode(products.content.decode('utf-8', 'ignore')))
  35. for product_json in products_json['items']:
  36. try:
  37. if product_json['type'] != 'product':
  38. continue
  39. product = {}
  40. price = {}
  41. product['title'] = product_json['title']
  42. product['url'] = config['action']['url'] + product_json['url']
  43. product['image_url'] = config['action']['url'] + product_json['imageUrl']
  44. product['info'] = product_json['subTitle']
  45. product['sku_code'] = product_json['code']
  46. for spec in product_json['specifications']:
  47. if spec['id'] == 'attEANCodeVariant':
  48. product['ean_code'] = spec['value']
  49. if spec['id'] == 'attLongDescription':
  50. product['description'] = spec['value']
  51. product['brand'] = product_json['brandName']
  52. price['price'] = product_json['price']
  53. price['promo'] = 0 if product_json['isDeal'] == False else 1
  54. price['promo_start'] = product_json['dealStartDate']
  55. price['promo_end'] = product_json['dealEndDate']
  56. common.add_product(kafka_producer, config['action']['name'], product, price)
  57. except Exception as err:
  58. common.dump_failed_product(config['action']['name'], cat_url, product_json, err, traceback.format_exc())
  59. if __name__ == "__main__" :
  60. config = common.get_config()
  61. producer = KafkaProducer(bootstrap_servers=[config['config']['kafka_boostrap_servers']])
  62. common.insert_update_store(producer, {'store': config['action']['name'], 'url': config['action']['url'], 'image_url': config['action']['logo']})
  63. common.clear_failed_product(config['action']['name'])
  64. categories = get_categories(config['action']['categories_url'])
  65. for cat in categories:
  66. soup = common.get_soup_page_no_limit(f"{config['action']['url']}{categories[cat]}")
  67. subcat = soup.find_all("a", class_="subcategory-cta-list__cta")
  68. for sc in subcat:
  69. get_products(config, sc['href'], producer)
  70. common.update_store_prices(producer, config['action'])