
我通过搜索任何未加载状态的内容来处理等待几个元素的问题。您也可以使用“:nth child”选择器,如下所示:#in for loop with enumerate for i

selector.append(' > li:nth-child(%i)' % (i + 1)) # identify child

by its order pos

这是我的硬注释代码解决方案,例如:def parse_crippled_shifted_list(driver, frame, selector, level=1, parent_id=0, path=None):


Traversal of html list of special structure (you can't know if element has sub list unless you enter it).

Supports start from remembered list element.

Nested lists have classes "closed" and "last closed" when closed and "open" and "last open" when opened (on


Elements themselves have classes "leaf" and "last leaf" in both cases.

Nested lists situate in

element as

  • list. Each

    • appears after clicking

in each .

If you click

driver - WebDriver; frame - frame of the list; selector - selector to current list (


level - level of depth, just for console output formatting, parent_id - id of parent category (in DB),

path - remained path in categories (ORM objects) to target category to start with.


# Add current level list elements

# This method selects all but loading. Just what is needed to exclude.

selector.append(' > li > a:not([class=loading])')

# Wait for child list to load


query = WebDriverWait(driver, WAIT_LONG_TIME).until(

EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

except TimeoutException:

print "%s timed out" % ''.join(selector)


# List is loaded

del selector[-1] # selector correction: delete last part aimed to get loaded content

selector.append(' > li')

children = driver.find_elements_by_css_selector(''.join(selector)) # fetch list elements

# Walk the whole list

for i, child in enumerate(children):

del selector[-1] # delete non-unique li tag selector

if selector[-1] != ' > ul' and selector[-1] != 'ul.ltr':

del selector[-1]

selector.append(' > li:nth-child(%i)' % (i + 1)) # identify child

by its order pos

selector.append(' > a') # add 'li > a' reference to click

child_link = driver.find_element_by_css_selector(''.join(selector))

# If we parse freely further (no need to start from remembered position)

if not path:

# Open child


double_click(driver, child_link)

except InvalidElementStateException:

print "\n\nERROR\n", InvalidElementStateException.message(), '\n\n'


# Determine its type

del selector[-1] # delete changed and already useless link reference

# If

is category, it would have as child now and class="open"

# Check by class is priority, because

exists for sure.

current_li = driver.find_element_by_css_selector(''.join(selector))

# Category case - BRANCH

if current_li.get_attribute('class') == 'open' or current_li.get_attribute('class') == 'last open':

new_parent_id = process_category_case(child_link, parent_id, level) # add category to DB

selector.append(' > ul') # forward to nested list

# Wait for nested list to load


query = WebDriverWait(driver, WAIT_LONG_TIME).until(

EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

except TimeoutException:

print "\t" * level, "%s timed out (%i secs). Failed to load nested list." %\

''.join(selector), WAIT_LONG_TIME

# Parse nested list


parse_crippled_shifted_list(driver, frame, selector, level + 1, new_parent_id)

# Page case - LEAF

elif current_li.get_attribute('class') == 'leaf' or current_li.get_attribute('class') == 'last leaf':

process_page_case(driver, child_link, level)


raise Exception('Damn! Alien class: %s' % current_li.get_attribute('class'))

# If it's required to continue from specified category


# Check if it's required category

if child_link.text == path[0].name:

# Open required category


double_click(driver, child_link)

except InvalidElementStateException:

print "\n\nERROR\n", InvalidElementStateException.msg, '\n\n'


# This element of list must be always category (have nested list)

del selector[-1] # delete changed and already useless link reference

# If

is category, it would have as child now and class="open"

# Check by class is priority, because

exists for sure.

current_li = driver.find_element_by_css_selector(''.join(selector))

# Category case - BRANCH

if current_li.get_attribute('class') == 'open' or current_li.get_attribute('class') == 'last open':

selector.append(' > ul') # forward to nested list

# Wait for nested list to load


query = WebDriverWait(driver, WAIT_LONG_TIME).until(

EC.presence_of_all_elements_located((By.CSS_SELECTOR, ''.join(selector))))

except TimeoutException:

print "\t" * level, "%s timed out (%i secs). Failed to load nested list." %\

''.join(selector), WAIT_LONG_TIME

# Process this nested list


last = path.pop(0)

if len(path) > 0: # If more to parse

print "\t" * level, "Going deeper to: %s" % ''.join(selector)

parse_crippled_shifted_list(driver, frame, selector, level + 1,

parent_id=last.id, path=path)

else: # Current is required

print "\t" * level, "Returning target category: ", ''.join(selector)

path = None

parse_crippled_shifted_list(driver, frame, selector, level + 1, last.id, path=None)

# Page case - LEAF

elif current_li.get_attribute('class') == 'leaf':



print "dummy"

del selector[-2:]

