Wednesday, 22 February 2023

Stale Element - Selenium - Python

So I'll start by saying that this has became such a mess with me trying to solve this issue, other times I have been able to resolve the stale element issue.

Problem all starts after the first players stats are stored ( Everything it should be doing up this point ), and then once it goes back to loop and find the next player we have the issue.

I'm not sure if its caused by the nested loops or what. I try reinstating the variable that is giving me the issues I assume all throughout the code. player_stats

The thing is I did have it previously going through 5 players, and I am not sure what happened, or when the bug first established itself lol, as I was working on getting the rounds won, and played situated.

(We aren't even able to print("Found playerCol element") on the second go around)

All print statements works till it hangs in the while loop after the first iteration.

Here is the full code (with comments):

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions importStaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
import re
import time

# Initialize the webdriver
driver = webdriver.Firefox()

# Navigate to the website
url = "https://www.hltv.org/stats/players"
driver.get(url)

WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()

# Find the elements containing the player statistics
player_stats = WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".playerCol, .statsDetail"))
)


# Extract the relevant data from the elements
players = []

for i, player_stat in enumerate(player_stats):
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".playerCol, .statsDetail")))
        while True:
            player_stats = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".playerCol, .statsDetail")))
            try:    
                if "playerCol" in player_stat.get_attribute("class"):
                    print("Found playerCol element")
                    name = player_stat.find_element(By.CSS_SELECTOR, "a").text if player_stat.find_elements(By.CSS_SELECTOR, "a") else player_stat.text
                    print(f"Name: {name}")
                elif "statsDetail" in player_stat.get_attribute("class"):
                    stats = player_stat.text.split()
                    if len(stats) >= 1 and re.search(r"\d+\.\d+", stats[0]):
                        kd_ratio = stats[0]
                break
            except StaleElementReferenceException as e:
                player_stats = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".playerCol, .statsDetail")))
                player_stats = driver.find_elements(By.CSS_SELECTOR, ".playerCol, .statsDetail")
                print(f"An error occurred while processing match stats: {e}")
                break

        # Extract the player stats
        if "statsDetail" in player_stat.get_attribute("class"):
            stats = player_stat.text.split()
            if len(stats) >= 1 and re.search(r"\d+\.\d+", stats[0]):
                kd_ratio = stats[0]

                # Process match stats for the player
                try:
                    time.sleep(1)
                    WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".playerCol, .statsDetail")))
                    player_link = driver.find_element(By.XPATH, f"//a[contains(text(), '{name}')]")
                    print(player_link.get_attribute('outerHTML'))
                    driver.execute_script("arguments[0].click();", player_link)
                    time.sleep(1)
                    player_stats = driver.find_elements(By.CSS_SELECTOR, ".playerCol, .statsDetail")
                    player = [name, kd_ratio]

                    # Extract additional player stats
                    headshot_percentage = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Headshot %')]/following-sibling::span"))).text
                    player.append(headshot_percentage)

                    kpr = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Kills / round')]/following-sibling::span"))).text
                    player.append(kpr)

                    dpr = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Deaths / round')]/following-sibling::span"))).text
                    player.append(dpr)

                    # Extract match stats for the player
                    matches_link = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href*='/stats/players/matches/'][data-link-tracking-destination='Click on Matches -> Individual -> Overview [subnavigation]']")))
                    driver.execute_script("arguments[0].click();", matches_link)
                    
                    match_stats = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.group-2, tr.group-1")))
                    match_scores = []
                    num_of_matches = 0
                    rounds_won = 0
                    rounds_played = 0
                    # Process match stats for the player
                    for i, match_stat in enumerate(match_stats):
                        player_name = player[0]
                        player_team = driver.find_element(By.CSS_SELECTOR, ".gtSmartphone-only span:last-of-type").text
                        try:
                            team_name = ""
                            score = ""
                            while team_name == "" or score == "":
                                try:
                                    team = match_stat.find_element(By.CSS_SELECTOR, ".gtSmartphone-only span:last-of-type").text
                                    team_name = team.strip()
                                    
                                    score_span = match_stat.find_element(By.XPATH, ".//div[contains(@class, 'gtSmartphone-only')]//*[contains(text(), '(')]")
                                    score_text = score_span.text.strip()
                                
                                    score = re.search(r'\((\d+)\)', score_text).group(1)
                                    
                                except:
                                    time.sleep(1)
                                    match_stats = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.group-2, tr.group-1")))
                                    match_stat = match_stats[i]
                            team_data = match_stat.find_elements(By.CSS_SELECTOR, ".gtSmartphone-only span")
                            print("Team data:", team_data[3].text)
                            if team_name.lower() == player_team.lower():
                                player_score = score
                                opposing_team_name = team_data[2].text.strip()
                                print(opposing_team_name)
                                opposing_team_score = team_data[3].text.strip('()')
                                print("Score strip: ", opposing_team_score)
                                rounds_won += int(player_score)
                                rounds_played += int(player_score) + int(opposing_team_score)
                            else:
                                player_score = team_data[1].text.strip('()')
                                print(player_score)
                                opposing_team_score = score
                                print(opposing_team_score)
                                opposing_team_name = team_data[0].text.strip()
                                print(opposing_team_name)
                                rounds_won += int(opposing_team_score)
                                rounds_played += int(player_score) + int(opposing_team_score)

                            match_scores.append((team_name, opposing_team_name, player_score, opposing_team_score))
                            num_of_matches += 1

                            if num_of_matches == 5: # exit loop after 5 iterations
                                break

                        except:
                            # Refresh the page if the element can't be found
                            driver.back()
                            player_stats = driver.find_elements(By.CSS_SELECTOR, ".playerCol, .statsDetail")
                            time.sleep(1)
                            match_stats = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.group-2, tr.group-1")))

                except Exception as e:
                    print(f"An error occurred while processing data for player {name}: {e}")
                    continue

                players.append([name, kd_ratio, headshot_percentage, kpr, dpr, rounds_won, rounds_played])
                print(players)
                print(f"{player_name}: {rounds_won} rounds won out of {rounds_played} rounds played in {num_of_matches} matches")
                driver.get(url)
                time.sleep(1)
    except StaleElementReferenceException as e:
    # handle the exception here
        print(f"An error occurred while processing match stats: {e}")
        break
# Close the webdriver
driver.quit()
# Store the data in a Pandas dataframe
df = pd.DataFrame(players, columns=["Name", "K/D", "HS %", "KPR", "DPR", "RW", "RP"])

# Clean the data
df["K/D"] = df["K/D"].str.extract(r"(\d+\.\d+)").astype(float)
df["HS %"] = df["HS %"].str.extract(r"(\d+\.\d+)").astype(float)
df["KPR"] = df["KPR"].str.extract(r"(\d+\.\d+)").astype(float)
df["DPR"] = df["DPR"].str.extract(r"(\d+\.\d+)").astype(float)



# Drop any rows that have missing or invalid data
df.dropna(subset=["Name", "K/D", "HS %", "KPR", "DPR"], inplace=True)


# Save the data to a CSV file
df.to_csv("player_stats.csv", index=False, sep='\t')

# Close the webdriver
driver.quit() 


from Stale Element - Selenium - Python

No comments:

Post a Comment