16 Oct, 2018
Categories: Advanced Java
HTML File
Create new html file name index.html as below:
<!DOCTYPE html>
<html>
<head>
<meta charset="ISO-8859-1">
<title>Extract HTML Tags with Regular Expression</title>
<link href="css/a.css" rel="stylesheet" type="text/css">
<link href="css/b.css" rel="stylesheet" type="text/css">
<script type="text/javascript" src="js/c.js"></script>
<script type="text/javascript" src="js/d.js"></script>
<script type="text/javascript" src="js/e.js"></script>
</head>
<body>
<p>Paragraph 1</p>
<p>Paragraph 2</p>
<img src="images/a.gif" width="120" height="100">
<img src="images/b.gif" width="120" height="100">
<a href="abc.com">Link 1</a>
<a href="def.net">Link 2</a>
</body>
</html>
Run Application
Create new java file named Main.java. This file use Regular Expression Extract HTML Tags as below:
package regular_expression;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
private static void extractWebPageTitle(String html) {
Pattern pattern = Pattern.compile("<title>(.*?)</title>");
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
System.out.println(matcher.group(1));
}
}
private static void extractCss(String html) {
Pattern pattern = Pattern.compile("<link.*\\s+rel=\"stylesheet\"([^>]+)>");
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println("========================================");
}
}
private static void extractJavascripts(String html) {
Pattern pattern = Pattern.compile("<script.*\\s+type=\"text/javascript\"([^>]+)></script>");
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println("========================================");
}
}
private static void extractLinks(String html) {
Pattern pattern = Pattern.compile("<a href=\"(.*?)\">.+</a>");
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println("==========================");
}
}
private static void extractImages(String html) {
Pattern pattern = Pattern.compile("<img src=\"(.*?)\">");
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println("=========================================");
}
}
public static void main(String[] args) {
try {
String html = new String(Files.readAllBytes(Paths.get("src\\regular_expression\\index.html")), StandardCharsets.UTF_8);
System.out.println("Extract Web Page Title");
extractWebPageTitle(html);
System.out.println("\nExtract CSS Links");
extractCss(html);
System.out.println("\nExtract JavaScript Links");
extractJavascripts(html);
System.out.println("\nExtract HTML Links");
extractLinks(html);
System.out.println("\nExtract Images");
extractImages(html);
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
}
Output
Extract Web Page Title
Extract HTML Tags with Regular Expression
Extract CSS Links
<link href="css/a.css" rel="stylesheet" type="text/css">
========================================
<link href="css/b.css" rel="stylesheet" type="text/css">
========================================
Extract JavaScript Links
<script type="text/javascript" src="js/c.js"></script>
========================================
<script type="text/javascript" src="js/d.js"></script>
========================================
<script type="text/javascript" src="js/e.js"></script>
========================================
Extract HTML Links
<a href="abc.com">Link 1</a>
==========================
<a href="def.net">Link 2</a>
==========================
Extract Images
<img src="images/a.gif" width="120" height="100">
=========================================
<img src="images/b.gif" width="120" height="100">
=========================================
Tags
annotationsannotations in Java ReflectionExtract HTML Tags with Regular Expressionjavajava reflectionJava Reflection for Annotationsjava regular expressionreflectionReflection in Javaregular expressionValidate American Phone Number with Regular Expression in JavaValidate Complex Password with Regular ExpressionValidate Credit Card Number with Regular ExpressionValidate IP Addresses with Regular ExpressionValidate URL Address with Regular Expression