import sys
import os
from playwright.sync_api import sync_playwright

def scrape_stores():
    output_file = r"D:\cnf\chatbot_canifa\backend\datadb\cuahang.txt"
    url = "https://canifa.com/cua-hang.html"

    print(f"Starting scrape from {url}...")
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        try:
            page.goto(url, timeout=60000)
            print("Page loaded.")
            
            # Wait for store list to be visible. 
            # Trying a generic wait first, or looking for specific text.
            page.wait_for_load_state("networkidle")
            
            # Attempt to extract data
            # Based on typical store locator pages, we look for list items.
            # If structure is unknown, we will grab the main container text.
            
            # Evaluate script to get all text from the store list container if possible
            # or just dump the whole body text if specific selectors aren't obvious.
            # We'll try to find elements with class containing 'store' or 'address' or similar.
            
            # Let's try to get all text content cleanly first.
            content = page.evaluate("""() => {
                // Return all text, or try to find specific store elements
                // Canifa tends to use specific classes.
                // Let's try to find the main container.
                
                // Heuristic: look for element containing 'Hệ thống cửa hàng'
                // and get its siblings or children.
                
                const bodyText = document.body.innerText;
                return bodyText;
            }""")
            
            with open(output_file, "w", encoding="utf-8") as f:
                f.write(content)
            
            print(f"Successfully saved data to {output_file}")
            
        except Exception as e:
            print(f"Error scraping: {e}")
            # Save screenshot if failed?
            # page.screenshot(path="screenshot.png")
            
        finally:
            browser.close()

if __name__ == "__main__":
    scrape_stores()
