Check Broken Links With Selenium Webdriver

Broken link is a url link which is unreachable because of server error. When you browse a url link, the server returns a three bit numeric status code to represent the status of the link.

Steps To Check Broken Link In Web Page

Please see the java code comments for detail explanation.

  1. Use selenium webdriver to parse out all same domain url links in current web page. Include a tag and img tag source url link.
    	/* 	 * Return all valid url links that belong to the same domain in current web page.	 * String webPageUrl: The web page that need to parse out links.	 * */	private List<String> parseOutAllUrlLinksInWebPage(String webPageUrl)	{		List<String> retList = new ArrayList<String>();		if(webPageUrl!=null && !"".equals(webPageUrl.trim()))		{			/* Get current page belongs domain. */			String urlDomain = this.getPageBelongDomain(webPageUrl);			WebDriver ffDriver = new FirefoxDriver();			/* Maximize the Firefox browser window. */			ffDriver.manage().window().maximize();			/* Get the web page.*/ 			ffDriver.get(webPageUrl);			/* First parse out all a tag href urls. */			By byUrlLink = By.tagName("a");			List<WebElement> aLinkList = ffDriver.findElements(byUrlLink);
    			if(aLinkList!=null)			{				int aLinkSize = aLinkList.size();				for(int i=0;i<aLinkSize;i++)				{					WebElement aLink = aLinkList.get(i);					String href = aLink.getAttribute("href");					if(href!=null && !"".equals(href.trim()))					{						/* Only return same domain page url. */						if(href.toLowerCase().startsWith("http://"+urlDomain) || href.toLowerCase().startsWith("https://"+urlDomain))						{							retList.add(href);						}					}				}			}			/* Second parse out all img tag src urls. */			By byImg = By.tagName("img");			List<WebElement> imgList = ffDriver.findElements(byImg);			if(imgList!=null)			{				int imgSize = imgList.size();				for(int i=0;i<imgSize;i++)				{					WebElement imgElement = imgList.get(i);					String src = imgElement.getAttribute("src");					if(src!=null && !"".equals(src.trim()))					{						/* Only return same domain page url. */						if(src.toLowerCase().startsWith("http://"+urlDomain) || src.toLowerCase().startsWith("https://"+urlDomain))						{							retList.add(src);						}					}				}			}			ffDriver.quit();		}		System.out.println("Parse out url completed successfully.");		return retList;	}	/* Return domain value in the pageUrl*/	private String getPageBelongDomain(String pageUrl)	{		String ret = "";		if(pageUrl!=null && !"".equals(pageUrl.trim()))		{			pageUrl = pageUrl.toLowerCase();			int startIdx = 0;			if(pageUrl.startsWith("http://"))			{				startIdx = "http://".length();			}else if(pageUrl.startsWith("https://"))			{				startIdx = "https://".length();			}			int endIdx = pageUrl.indexOf("/", startIdx);			ret = pageUrl.substring(startIdx, endIdx);		}		System.out.println("The webpage: " + pageUrl + " , belonged domain : " + ret);		return ret;	}
  2. Use HttpConnection to get each parsed out url page’s http response status code. If the code is 4XX or 5XX then it is a broken link.
    	/* Get http response status code for a web page connection. */	private int getHttpResponseStatusCode(String webPageUrl)	{		int ret = -1;		try		{			if(webPageUrl!=null && !"".equals(webPageUrl.trim()))			{				URL urlObj = new URL(webPageUrl.trim());				HttpURLConnection httpConn = (HttpURLConnection)urlObj.openConnection();				httpConn.setRequestMethod("HEAD");				httpConn.connect();				ret = httpConn.getResponseCode();			}		}catch(Exception ex)		{			ex.printStackTrace();		}finally		{			System.out.println("Http Status Code : " + ret + " , Url : " + webPageUrl);			return ret;		}	}
  3. Java code in main method.
    	public static void main(String args[])	{		TestBrokenLinksInWebPage testBrokenLink = new TestBrokenLinksInWebPage();		/* Parse out all same domain page links.*/		List<String> pageLinkList = testBrokenLink.parseOutAllUrlLinksInWebPage("https://www.w3.org/");		/* Loop in the list to check each link http response status code. */		int size = pageLinkList.size();		for(int i=0;i<size;i++)		{			String webPageUrl = pageLinkList.get(i);			int httpStatusCode = testBrokenLink.getHttpResponseStatusCode(webPageUrl);		}	}

Output

Check Broken Links With Selenium WebDriver

Download “Test-Broken-Links-In-Web-Page-Code-Example.zip” Test-Broken-Links-In-Web-Page-Code-Example.zip – Downloaded 161 times – 1 KB

  • 36