kalathma - Python

Lasanthajn

Well-known member
  • Nov 26, 2007
    1,612
    1,458
    113
    comments ටික මේක දාගෙන දැම්මම බාපල්ලා

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException, NoSuchElementException
    import time
    import csv
    # TikTok video URL
    url = "https://www.tiktok.com/@rizzcado/photo/7491129701956652306"
    # Configure Chrome options for headless browsing
    chrome_options = Options()
    # Comment out headless mode for debugging if needed
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--mute-audio")
    chrome_options.add_argument("--window-size=1920,1080") # Set window size
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    # Set page load timeout
    driver.set_page_load_timeout(30)
    # Navigate to the URL
    print(f"Opening TikTok URL: {url}")
    driver.get(url)
    # Wait for page to load properly
    time.sleep(5)
    # Function to scroll down to load more comments
    def scroll_to_load_comments(driver, max_scrolls=15):
    print("Starting to scroll to load comments...")
    last_height = driver.execute_script("return document.body.scrollHeight")
    scroll_count = 0

    while scroll_count < max_scrolls:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    time.sleep(3)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
    # Try clicking "load more" if it exists
    try:
    load_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Load more') or contains(text(), 'View more')]")
    if load_buttons:
    for button in load_buttons:
    try:
    print("Clicking load more button...")
    driver.execute_script("arguments[0].click();", button)
    time.sleep(2)
    except:
    pass
    else:
    # If we scrolled a few times and no new content, break
    if scroll_count > 3:
    break
    except:
    if scroll_count > 3:
    break

    last_height = new_height
    scroll_count += 1
    print(f"Completed scroll {scroll_count}/{max_scrolls}")
    # Try to handle cookie consent if it appears
    try:
    cookie_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Accept') or contains(text(), 'Accept all')]")
    if cookie_buttons:
    for button in cookie_buttons:
    try:
    button.click()
    print("Clicked cookie consent button")
    time.sleep(1)
    except:
    pass
    except:
    pass
    # Scroll to load comments
    scroll_to_load_comments(driver)
    # Try different CSS selectors for comments
    possible_comment_selectors = [
    ".tiktok-x6o8yi-DivCommentItem",
    "[data-e2e='comment-item']",
    ".comment-item",
    ".tiktok-comment-item",
    "div[class*='CommentItem']"
    ]
    comments = []
    used_selector = None
    for selector in possible_comment_selectors:
    print(f"Trying to find comments with selector: {selector}")
    comment_elements = driver.find_elements(By.CSS_SELECTOR, selector)

    if comment_elements:
    used_selector = selector
    print(f"Found {len(comment_elements)} comments using selector: {selector}")
    break
    if used_selector:
    comment_elements = driver.find_elements(By.CSS_SELECTOR, used_selector)

    for element in comment_elements:
    try:
    # Try different selectors for username and comment text
    username_selectors = [
    'a[data-e2e="comment-username"]',
    'span[data-e2e="comment-username"]',
    '.nickname',
    'a.user-nickname',
    'div[class*="Author"] span'
    ]

    comment_selectors = [
    'p[data-e2e="comment-level"]',
    'span[data-e2e="comment-text"]',
    '.comment-text',
    'div[class*="CommentText"]'
    ]

    user_name = None
    for username_selector in username_selectors:
    try:
    user_name_element = element.find_element(By.CSS_SELECTOR, username_selector)
    user_name = user_name_element.text.strip()
    if user_name:
    break
    except:
    continue

    comment_text = None
    for comment_selector in comment_selectors:
    try:
    comment_text_element = element.find_element(By.CSS_SELECTOR, comment_selector)
    comment_text = comment_text_element.text.strip()
    if comment_text:
    break
    except:
    continue

    if user_name and comment_text:
    comments.append([user_name, comment_text])
    print(f"Extracted comment from {user_name}: {comment_text[:30]}...")
    except Exception as e:
    print(f"Error extracting comment: {e}")
    else:
    # If no comments found with CSS selectors, try XPath as a fallback
    print("No comments found with CSS selectors, trying XPath...")
    try:
    comment_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'Comment') or contains(@class, 'comment')]")
    print(f"Found {len(comment_elements)} comments using XPath")

    for element in comment_elements:
    try:
    full_text = element.text
    # Simple text processing to extract username and comment
    if ":" in full_text:
    parts = full_text.split(":", 1)
    user_name = parts[0].strip()
    comment_text = parts[1].strip()
    comments.append([user_name, comment_text])
    print(f"Extracted comment via text processing: {user_name}: {comment_text[:30]}...")
    except Exception as e:
    print(f"Error extracting comment with XPath: {e}")
    except Exception as e:
    print(f"Error with XPath approach: {e}")
    # Take a screenshot for debugging purposes
    screenshot_file = "tiktok_screenshot.png"
    driver.save_screenshot(screenshot_file)
    print(f"Saved screenshot to {screenshot_file} for debugging")
    # Print summary
    print(f"Total comments extracted: {len(comments)}")
    # Close the driver
    driver.quit()
    # Write comments to CSV file
    csv_file = "tiktok_comments.csv"
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Username", "Comment"]) # Write header
    writer.writerows(comments) # Write comments
    print(f"Comments saved to {csv_file}")
     

    VimangaLK

    Well-known member
  • Sep 26, 2024
    998
    1,524
    93
    Python:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException, NoSuchElementException
    import time
    import csv
    # TikTok video URL
    url = "https://www.tiktok.com/@rizzcado/photo/7491129701956652306"
    # Configure Chrome options for headless browsing
    chrome_options = Options()
    # Comment out headless mode for debugging if needed
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--mute-audio")
    chrome_options.add_argument("--window-size=1920,1080") # Set window size
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    # Set page load timeout
    driver.set_page_load_timeout(30)
    # Navigate to the URL
    print(f"Opening TikTok URL: {url}")
    driver.get(url)
    # Wait for page to load properly
    time.sleep(5)
    # Function to scroll down to load more comments
    def scroll_to_load_comments(driver, max_scrolls=15):
    print("Starting to scroll to load comments...")
    last_height = driver.execute_script("return document.body.scrollHeight")
    scroll_count = 0
    
    while scroll_count < max_scrolls:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # Wait to load page
    time.sleep(3)
    
    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
    # Try clicking "load more" if it exists
    try:
    load_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Load more') or contains(text(), 'View more')]")
    if load_buttons:
    for button in load_buttons:
    try:
    print("Clicking load more button...")
    driver.execute_script("arguments[0].click();", button)
    time.sleep(2)
    except:
    pass
    else:
    # If we scrolled a few times and no new content, break
    if scroll_count > 3:
    break
    except:
    if scroll_count > 3:
    break
    
    last_height = new_height
    scroll_count += 1
    print(f"Completed scroll {scroll_count}/{max_scrolls}")
    # Try to handle cookie consent if it appears
    try:
    cookie_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Accept') or contains(text(), 'Accept all')]")
    if cookie_buttons:
    for button in cookie_buttons:
    try:
    button.click()
    print("Clicked cookie consent button")
    time.sleep(1)
    except:
    pass
    except:
    pass
    # Scroll to load comments
    scroll_to_load_comments(driver)
    # Try different CSS selectors for comments
    possible_comment_selectors = [
    ".tiktok-x6o8yi-DivCommentItem",
    "[data-e2e='comment-item']",
    ".comment-item",
    ".tiktok-comment-item",
    "div[class*='CommentItem']"
    ]
    comments = []
    used_selector = None
    for selector in possible_comment_selectors:
    print(f"Trying to find comments with selector: {selector}")
    comment_elements = driver.find_elements(By.CSS_SELECTOR, selector)
    
    if comment_elements:
    used_selector = selector
    print(f"Found {len(comment_elements)} comments using selector: {selector}")
    break
    if used_selector:
    comment_elements = driver.find_elements(By.CSS_SELECTOR, used_selector)
    
    for element in comment_elements:
    try:
    # Try different selectors for username and comment text
    username_selectors = [
    'a[data-e2e="comment-username"]',
    'span[data-e2e="comment-username"]',
    '.nickname',
    'a.user-nickname',
    'div[class*="Author"] span'
    ]
    
    comment_selectors = [
    'p[data-e2e="comment-level"]',
    'span[data-e2e="comment-text"]',
    '.comment-text',
    'div[class*="CommentText"]'
    ]
    
    user_name = None
    for username_selector in username_selectors:
    try:
    user_name_element = element.find_element(By.CSS_SELECTOR, username_selector)
    user_name = user_name_element.text.strip()
    if user_name:
    break
    except:
    continue
    
    comment_text = None
    for comment_selector in comment_selectors:
    try:
    comment_text_element = element.find_element(By.CSS_SELECTOR, comment_selector)
    comment_text = comment_text_element.text.strip()
    if comment_text:
    break
    except:
    continue
    
    if user_name and comment_text:
    comments.append([user_name, comment_text])
    print(f"Extracted comment from {user_name}: {comment_text[:30]}...")
    except Exception as e:
    print(f"Error extracting comment: {e}")
    else:
    # If no comments found with CSS selectors, try XPath as a fallback
    print("No comments found with CSS selectors, trying XPath...")
    try:
    comment_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'Comment') or contains(@class, 'comment')]")
    print(f"Found {len(comment_elements)} comments using XPath")
    
    for element in comment_elements:
    try:
    full_text = element.text
    # Simple text processing to extract username and comment
    if ":" in full_text:
    parts = full_text.split(":", 1)
    user_name = parts[0].strip()
    comment_text = parts[1].strip()
    comments.append([user_name, comment_text])
    print(f"Extracted comment via text processing: {user_name}: {comment_text[:30]}...")
    except Exception as e:
    print(f"Error extracting comment with XPath: {e}")
    except Exception as e:
    print(f"Error with XPath approach: {e}")
    # Take a screenshot for debugging purposes
    screenshot_file = "tiktok_screenshot.png"
    driver.save_screenshot(screenshot_file)
    print(f"Saved screenshot to {screenshot_file} for debugging")
    # Print summary
    print(f"Total comments extracted: {len(comments)}")
    # Close the driver
    driver.quit()
    # Write comments to CSV file
    csv_file = "tiktok_comments.csv"
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Username", "Comment"]) # Write header
    writer.writerows(comments) # Write comments
    print(f"Comments saved to {csv_file}")
     

    TNHM

    Well-known member
  • Jan 3, 2017
    5,112
    14,737
    113
    API එකක් නැද්ද tiktok එකේ
    ඕකෙන් comments scrape වෙන්න දවස් ගානක් යයි
     
    • Like
    Reactions: Cenon

    Truth seeker 2020

    Well-known member
  • Sep 29, 2019
    975
    1,467
    93
    Download කරගත්තු එකෙක් මෙතැන දාපල්ලා. හැබැයි comments, replys එකතැන තියෙනවා නමි තමයි හොද.
     

    kavindagk

    Well-known member
  • Dec 18, 2008
    934
    958
    93
    Colombo
    1744772578967.png