import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
# ====== НАСТРОЙКИ ======
INPUT_FILE = "москва уборка офиса.xlsx"
OUTPUT_FILE = "sites_without_cookie_banner.xlsx"
WAIT_TIME = 5
# =======================
# Ключевые слова для cookie-баннера
COOKIE_KEYWORDS = [
"cookie",
"куки",
"файлы cookie",
"согласие",
"мы используем",
"используем cookies",
"политик"
]
def has_cookie_banner(driver):
try:
body_text = driver.find_element(By.TAG_NAME, "body").text.lower()
for word in COOKIE_KEYWORDS:
if word in body_text:
return True
return False
except:
return False
def get_messengers(driver):
whatsapp = ""
telegram = ""
links = driver.find_elements(By.TAG_NAME, "a")
for link in links:
href = link.get_attribute("href")
if href:
if "wa.me" in href or "whatsapp" in href:
whatsapp = href
if "t.me" in href or "telegram" in href:
telegram = href
return whatsapp, telegram
# Chrome headless
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
# Читаем Excel
df = pd.read_excel(INPUT_FILE)
result = []
for index, row in df.iterrows():
company = row.get("Название")
site = row.get("Сайт")
rating = row.get("Рейтинг")
reviews = row.get("Количество отзывов")
if pd.isna(site):
continue
try:
driver.get(site)
time.sleep(WAIT_TIME)
if not has_cookie_banner(driver):
whatsapp, telegram = get_messengers(driver)
result.append({
"Название компании": company,
"Сайт": site,
"Рейтинг": rating,
"Количество отзывов": reviews,
"WhatsApp": whatsapp,
"Telegram": telegram
})
print(f"Проверено: {site}")
except Exception as e:
print(f"Ошибка: {site}")
driver.quit()
# Сохраняем новый Excel
result_df = pd.DataFrame(result)
result_df.to_excel(OUTPUT_FILE, index=False)
print("Готово. Файл сохранён:", OUTPUT_FILE)