1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package de.smartics.properties.utils;
17
18 import java.io.ByteArrayOutputStream;
19 import java.io.IOException;
20
21 import org.apache.commons.lang.StringUtils;
22 import org.htmlcleaner.CleanerProperties;
23 import org.htmlcleaner.DefaultTagProvider;
24 import org.htmlcleaner.HtmlCleaner;
25 import org.htmlcleaner.ITagInfoProvider;
26 import org.htmlcleaner.JDomSerializer;
27 import org.htmlcleaner.SimpleXmlSerializer;
28 import org.htmlcleaner.TagNode;
29 import org.jdom.Document;
30
31 import de.smartics.util.lang.Arg;
32
33
34
35
36 public final class HtmlUtils
37 {
38
39
40
41
42
43
44
45
46
47 private final String outputEncoding;
48
49
50
51
52 private final HtmlCleaner cleaner;
53
54
55
56
57 private final JavadocCommentHelper helper = JavadocCommentHelper.createHtml();
58
59
60
61
62
63
64
65
66
67
68 public HtmlUtils(final String outputEncoding)
69 {
70 this.outputEncoding = Arg.checkNotBlank("outputEncoding", outputEncoding);
71
72 cleaner = new HtmlCleaner();
73 }
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public String getOutputEncoding()
89 {
90 return outputEncoding;
91 }
92
93
94
95
96
97
98
99
100
101 public String cleanHtmlAndJavadoc(final String htmlFragment)
102 {
103 final String javadocCleaned =
104 helper.expandFirstBlock(helper.replaceJavadocInlines(htmlFragment));
105 final String htmlCleaned = clean(javadocCleaned);
106 return htmlCleaned;
107 }
108
109
110
111
112
113
114
115 public String clean(final String htmlFragment)
116 {
117 if (StringUtils.isBlank(htmlFragment))
118 {
119 return htmlFragment;
120 }
121 final TagNode tagNode = cleaner.clean(htmlFragment);
122 final TagNode body = tagNode.findElementByName("body", false);
123
124 final ByteArrayOutputStream out = new ByteArrayOutputStream(1024);
125 final CleanerProperties cleanerProps = createCleanerProperties();
126 try
127 {
128 final SimpleXmlSerializer serializer =
129 new SimpleXmlSerializer(cleanerProps);
130 serializer.writeToStream(body, out, outputEncoding, true);
131 final String content = out.toString(outputEncoding);
132 return content;
133 }
134 catch (final IOException e)
135 {
136 throw new IllegalStateException(
137 "Streaming error with in memory stream or encoding.", e);
138 }
139 }
140
141
142
143
144
145
146
147 public Document cleanJDom(final String htmlFragment)
148 {
149 if (StringUtils.isBlank(htmlFragment))
150 {
151 return null;
152 }
153
154 final TagNode tagNode = cleaner.clean(htmlFragment);
155 final TagNode body = tagNode.findElementByName("body", false);
156 final CleanerProperties cleanerProps = createCleanerProperties();
157 final Document document =
158 new JDomSerializer(cleanerProps, true).createJDom(body);
159 return document;
160 }
161
162 private static CleanerProperties createCleanerProperties()
163 {
164 final CleanerProperties properties = new CleanerProperties()
165 {
166
167
168 ITagInfoProvider tagInfoProvider = new DefaultTagProvider();
169
170
171
172 @Override
173 public ITagInfoProvider getTagInfoProvider()
174 {
175 return tagInfoProvider;
176 }
177 };
178
179 properties.setOmitHtmlEnvelope(true);
180 properties.setOmitDoctypeDeclaration(true);
181 properties.setOmitXmlDeclaration(true);
182 properties.setUseEmptyElementTags(true);
183
184 return properties;
185 }
186
187
188
189 }