#!/usr/bin/python3 #import myurllib2 as urllib2 import urllib.request import ssl from lxml.html import document_fromstring https_sslv3_handler=urllib.request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_SSLv3)) opener=urllib.request.build_opener(https_sslv3_handler) urllib.request.install_opener(opener) def fetch_url(url): for tries in range(3): try: handle=urllib.request.urlopen(url) break except IOError as e: if hasattr(e, 'code'): # HTTPError print( 'http error code: ', e.code ) elif hasattr(e, 'reason'): # URLError print( "can't connect, reason: ", e.reason ) else: raise return handle def fetch_utilisateurs(unite): liste=[] page=0 while True: html=fetch_url("https://web-ast.dsi.cnrs.fr/l3c/owa/personnel.liste?nom=&prenom=&code_labo=%s&p_dep=&sec=&bap=&emptype=&nat_pers=&typ_pers=&organisme=&delegation=&ville=&p_i=%s&p_nb_res=0"%(unite,page)) string=html.read() content=document_fromstring(string) struct=content.xpath('/html/body/table[3]/tr') for i in struct: children=i.getchildren() if children[0].tag=="td": liste.append("%s %s"%(children[0].text_content(),children[1].text_content())) struct=content.xpath("//input[@alt='les 10 personnes suivantes']") if struct: #"une page suivante" page+=10 else: #"c'est fini" break return liste if __name__=="__main__": unite="UMR7640" liste_utilisateurs=fetch_utilisateurs(unite) for i in liste_utilisateurs: print( i)