`
dajiangxiaoyan
  • 浏览: 20490 次
  • 性别: Icon_minigender_2
  • 来自: 北京
社区版块
存档分类
最新评论

selenium 级联打开页面上的所有超链接

 
阅读更多

前段时间在写web页面,为了方便用selenium写了一个级联打开页面上的所有超链接,代码如下
import com.thoughtworks.selenium.DefaultSelenium;
import com.thoughtworks.selenium.Selenium;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class OpenLinkTestThread extends Thread {

    private EnviromentPara ep;

    public OpenLinkTestThread(EnviromentPara ep) {
        this.ep = ep;
    }

    @Override
    public void run() {
        openLinkTest();
    }

    public void openLinkTest() {
        try {
            Selenium selenium = new DefaultSelenium("localhost", Constant.port, ep.getBrowser(), ep.getUrl());  //4444 is default server port,  ep.getBrowser():"*firefox","*googlechrome","*iexplore",不过ie支持不是很好
            selenium.start();
            //selenium.open(ep.getUrl());
            selenium.openWindow(ep.getUrl(), ep.getUrl());//打开一个新的窗口,窗口id:ep.getUrl()
            selenium.waitForPopUp(ep.getUrl(), "100000");
            openLinkForOnePage(selenium, ep.getBrowser(), ep.getUrl(), ep.isIsRecursion(),1);//ep.isIsRecursion()是否递归打开链接

            if (ep.isCloseOnFinish()) {
                Thread.sleep(10000);
                selenium.stop();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void openLinkForOnePage(Selenium selenium, String browser,
            String url, boolean recursion, int recursionDeep) {
        selenium.selectWindow(url);
        String htmlSource = selenium.getHtmlSource();
        List list = getAllLinkForOnePage(htmlSource);

        for (int i = 0; list != null && i < list.size(); i++) {
            selenium.openWindow((String) list.get(i), (String) list.get(i));
            selenium.waitForPopUp((String) list.get(i), "100000");
            if (recursion) {
                if(recursionDeep < ep.getRecursionDeep())
                    openLinkForOnePage(selenium, browser, (String) list.get(i), recursion,recursionDeep+1);
            }
        }
    }

    public List getAllLinkForOnePage(String htmlSource) {//提取页面上的所有超链接
        List list = new ArrayList();

        Pattern linkElementPattern = Pattern.compile("<a\\s.*?href=\"([^\"]+)\"[^>]*>(.*?)</a>");
        Matcher linkElementMatcher = linkElementPattern.matcher(htmlSource);

        while (linkElementMatcher.find()) {
            String temp = linkElementMatcher.group(1);
            if(temp!=null)
                temp=temp.trim();
            
            if (temp==null||temp.startsWith("#") || temp.toLowerCase().startsWith("javascript:")) {//
                continue;
            }
            temp = temp.replace("&amp;", "&");
            list.add(temp);
        }
        return list;
    }
}
 
0
3
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics