import time import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from webdriver_manager.chrome import ChromeDriverManager # ====== НАСТРОЙКИ ====== INPUT_FILE = "москва уборка офиса.xlsx" OUTPUT_FILE = "sites_without_cookie_banner.xlsx" WAIT_TIME = 5 # ======================= # Ключевые слова для cookie-баннера COOKIE_KEYWORDS = [ "cookie", "куки", "файлы cookie", "согласие", "мы используем", "используем cookies", "политик" ] def has_cookie_banner(driver): try: body_text = driver.find_element(By.TAG_NAME, "body").text.lower() for word in COOKIE_KEYWORDS: if word in body_text: return True return False except: return False def get_messengers(driver): whatsapp = "" telegram = "" links = driver.find_elements(By.TAG_NAME, "a") for link in links: href = link.get_attribute("href") if href: if "wa.me" in href or "whatsapp" in href: whatsapp = href if "t.me" in href or "telegram" in href: telegram = href return whatsapp, telegram # Chrome headless chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) # Читаем Excel df = pd.read_excel(INPUT_FILE) result = [] for index, row in df.iterrows(): company = row.get("Название") site = row.get("Сайт") rating = row.get("Рейтинг") reviews = row.get("Количество отзывов") if pd.isna(site): continue try: driver.get(site) time.sleep(WAIT_TIME) if not has_cookie_banner(driver): whatsapp, telegram = get_messengers(driver) result.append({ "Название компании": company, "Сайт": site, "Рейтинг": rating, "Количество отзывов": reviews, "WhatsApp": whatsapp, "Telegram": telegram }) print(f"Проверено: {site}") except Exception as e: print(f"Ошибка: {site}") driver.quit() # Сохраняем новый Excel result_df = pd.DataFrame(result) result_df.to_excel(OUTPUT_FILE, index=False) print("Готово. Файл сохранён:", OUTPUT_FILE)
Made on
Tilda