comments ටික මේක දාගෙන දැම්මම බාපල්ලා
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import csv
# TikTok video URL
url = "https://www.tiktok.com/@rizzcado/photo/7491129701956652306"
# Configure Chrome options for headless browsing
chrome_options = Options()
# Comment out headless mode for debugging if needed
# chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--mute-audio")
chrome_options.add_argument("--window-size=1920,1080") # Set window size
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
# Initialize the Chrome driver
driver = webdriver.Chrome(options=chrome_options)
# Set page load timeout
driver.set_page_load_timeout(30)
# Navigate to the URL
print(f"Opening TikTok URL: {url}")
driver.get(url)
# Wait for page to load properly
time.sleep(5)
# Function to scroll down to load more comments
def scroll_to_load_comments(driver, max_scrolls=15):
print("Starting to scroll to load comments...")
last_height = driver.execute_script("return document.body.scrollHeight")
scroll_count = 0
while scroll_count < max_scrolls:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(3)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
# Try clicking "load more" if it exists
try:
load_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Load more') or contains(text(), 'View more')]")
if load_buttons:
for button in load_buttons:
try:
print("Clicking load more button...")
driver.execute_script("arguments[0].click();", button)
time.sleep(2)
except:
pass
else:
# If we scrolled a few times and no new content, break
if scroll_count > 3:
break
except:
if scroll_count > 3:
break
last_height = new_height
scroll_count += 1
print(f"Completed scroll {scroll_count}/{max_scrolls}")
# Try to handle cookie consent if it appears
try:
cookie_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Accept') or contains(text(), 'Accept all')]")
if cookie_buttons:
for button in cookie_buttons:
try:
button.click()
print("Clicked cookie consent button")
time.sleep(1)
except:
pass
except:
pass
# Scroll to load comments
scroll_to_load_comments(driver)
# Try different CSS selectors for comments
possible_comment_selectors = [
".tiktok-x6o8yi-DivCommentItem",
"[data-e2e='comment-item']",
".comment-item",
".tiktok-comment-item",
"div[class*='CommentItem']"
]
comments = []
used_selector = None
for selector in possible_comment_selectors:
print(f"Trying to find comments with selector: {selector}")
comment_elements = driver.find_elements(By.CSS_SELECTOR, selector)
if comment_elements:
used_selector = selector
print(f"Found {len(comment_elements)} comments using selector: {selector}")
break
if used_selector:
comment_elements = driver.find_elements(By.CSS_SELECTOR, used_selector)
for element in comment_elements:
try:
# Try different selectors for username and comment text
username_selectors = [
'a[data-e2e="comment-username"]',
'span[data-e2e="comment-username"]',
'.nickname',
'a.user-nickname',
'div[class*="Author"] span'
]
comment_selectors = [
'p[data-e2e="comment-level"]',
'span[data-e2e="comment-text"]',
'.comment-text',
'div[class*="CommentText"]'
]
user_name = None
for username_selector in username_selectors:
try:
user_name_element = element.find_element(By.CSS_SELECTOR, username_selector)
user_name = user_name_element.text.strip()
if user_name:
break
except:
continue
comment_text = None
for comment_selector in comment_selectors:
try:
comment_text_element = element.find_element(By.CSS_SELECTOR, comment_selector)
comment_text = comment_text_element.text.strip()
if comment_text:
break
except:
continue
if user_name and comment_text:
comments.append([user_name, comment_text])
print(f"Extracted comment from {user_name}: {comment_text[:30]}...")
except Exception as e:
print(f"Error extracting comment: {e}")
else:
# If no comments found with CSS selectors, try XPath as a fallback
print("No comments found with CSS selectors, trying XPath...")
try:
comment_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'Comment') or contains(@class, 'comment')]")
print(f"Found {len(comment_elements)} comments using XPath")
for element in comment_elements:
try:
full_text = element.text
# Simple text processing to extract username and comment
if ":" in full_text:
parts = full_text.split(":", 1)
user_name = parts[0].strip()
comment_text = parts[1].strip()
comments.append([user_name, comment_text])
print(f"Extracted comment via text processing: {user_name}: {comment_text[:30]}...")
except Exception as e:
print(f"Error extracting comment with XPath: {e}")
except Exception as e:
print(f"Error with XPath approach: {e}")
# Take a screenshot for debugging purposes
screenshot_file = "tiktok_screenshot.png"
driver.save_screenshot(screenshot_file)
print(f"Saved screenshot to {screenshot_file} for debugging")
# Print summary
print(f"Total comments extracted: {len(comments)}")
# Close the driver
driver.quit()
# Write comments to CSV file
csv_file = "tiktok_comments.csv"
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Username", "Comment"]) # Write header
writer.writerows(comments) # Write comments
print(f"Comments saved to {csv_file}")
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import csv
# TikTok video URL
url = "https://www.tiktok.com/@rizzcado/photo/7491129701956652306"
# Configure Chrome options for headless browsing
chrome_options = Options()
# Comment out headless mode for debugging if needed
# chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--mute-audio")
chrome_options.add_argument("--window-size=1920,1080") # Set window size
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")
# Initialize the Chrome driver
driver = webdriver.Chrome(options=chrome_options)
# Set page load timeout
driver.set_page_load_timeout(30)
# Navigate to the URL
print(f"Opening TikTok URL: {url}")
driver.get(url)
# Wait for page to load properly
time.sleep(5)
# Function to scroll down to load more comments
def scroll_to_load_comments(driver, max_scrolls=15):
print("Starting to scroll to load comments...")
last_height = driver.execute_script("return document.body.scrollHeight")
scroll_count = 0
while scroll_count < max_scrolls:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(3)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
# Try clicking "load more" if it exists
try:
load_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Load more') or contains(text(), 'View more')]")
if load_buttons:
for button in load_buttons:
try:
print("Clicking load more button...")
driver.execute_script("arguments[0].click();", button)
time.sleep(2)
except:
pass
else:
# If we scrolled a few times and no new content, break
if scroll_count > 3:
break
except:
if scroll_count > 3:
break
last_height = new_height
scroll_count += 1
print(f"Completed scroll {scroll_count}/{max_scrolls}")
# Try to handle cookie consent if it appears
try:
cookie_buttons = driver.find_elements(By.XPATH, "//*[contains(text(), 'Accept') or contains(text(), 'Accept all')]")
if cookie_buttons:
for button in cookie_buttons:
try:
button.click()
print("Clicked cookie consent button")
time.sleep(1)
except:
pass
except:
pass
# Scroll to load comments
scroll_to_load_comments(driver)
# Try different CSS selectors for comments
possible_comment_selectors = [
".tiktok-x6o8yi-DivCommentItem",
"[data-e2e='comment-item']",
".comment-item",
".tiktok-comment-item",
"div[class*='CommentItem']"
]
comments = []
used_selector = None
for selector in possible_comment_selectors:
print(f"Trying to find comments with selector: {selector}")
comment_elements = driver.find_elements(By.CSS_SELECTOR, selector)
if comment_elements:
used_selector = selector
print(f"Found {len(comment_elements)} comments using selector: {selector}")
break
if used_selector:
comment_elements = driver.find_elements(By.CSS_SELECTOR, used_selector)
for element in comment_elements:
try:
# Try different selectors for username and comment text
username_selectors = [
'a[data-e2e="comment-username"]',
'span[data-e2e="comment-username"]',
'.nickname',
'a.user-nickname',
'div[class*="Author"] span'
]
comment_selectors = [
'p[data-e2e="comment-level"]',
'span[data-e2e="comment-text"]',
'.comment-text',
'div[class*="CommentText"]'
]
user_name = None
for username_selector in username_selectors:
try:
user_name_element = element.find_element(By.CSS_SELECTOR, username_selector)
user_name = user_name_element.text.strip()
if user_name:
break
except:
continue
comment_text = None
for comment_selector in comment_selectors:
try:
comment_text_element = element.find_element(By.CSS_SELECTOR, comment_selector)
comment_text = comment_text_element.text.strip()
if comment_text:
break
except:
continue
if user_name and comment_text:
comments.append([user_name, comment_text])
print(f"Extracted comment from {user_name}: {comment_text[:30]}...")
except Exception as e:
print(f"Error extracting comment: {e}")
else:
# If no comments found with CSS selectors, try XPath as a fallback
print("No comments found with CSS selectors, trying XPath...")
try:
comment_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'Comment') or contains(@class, 'comment')]")
print(f"Found {len(comment_elements)} comments using XPath")
for element in comment_elements:
try:
full_text = element.text
# Simple text processing to extract username and comment
if ":" in full_text:
parts = full_text.split(":", 1)
user_name = parts[0].strip()
comment_text = parts[1].strip()
comments.append([user_name, comment_text])
print(f"Extracted comment via text processing: {user_name}: {comment_text[:30]}...")
except Exception as e:
print(f"Error extracting comment with XPath: {e}")
except Exception as e:
print(f"Error with XPath approach: {e}")
# Take a screenshot for debugging purposes
screenshot_file = "tiktok_screenshot.png"
driver.save_screenshot(screenshot_file)
print(f"Saved screenshot to {screenshot_file} for debugging")
# Print summary
print(f"Total comments extracted: {len(comments)}")
# Close the driver
driver.quit()
# Write comments to CSV file
csv_file = "tiktok_comments.csv"
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Username", "Comment"]) # Write header
writer.writerows(comments) # Write comments
print(f"Comments saved to {csv_file}")