View Javadoc

1   /*
2    * Copyright 2012-2013 smartics, Kronseder & Reiner GmbH
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package de.smartics.properties.utils;
17  
18  import java.io.ByteArrayOutputStream;
19  import java.io.IOException;
20  
21  import org.apache.commons.lang.StringUtils;
22  import org.htmlcleaner.CleanerProperties;
23  import org.htmlcleaner.DefaultTagProvider;
24  import org.htmlcleaner.HtmlCleaner;
25  import org.htmlcleaner.ITagInfoProvider;
26  import org.htmlcleaner.JDomSerializer;
27  import org.htmlcleaner.SimpleXmlSerializer;
28  import org.htmlcleaner.TagNode;
29  import org.jdom.Document;
30  
31  import de.smartics.util.lang.Arg;
32  
33  /**
34   * Utilities to deal with HTML.
35   */
36  public final class HtmlUtils
37  {
38    // ********************************* Fields *********************************
39  
40    // --- constants ------------------------------------------------------------
41  
42    // --- members --------------------------------------------------------------
43  
44    /**
45     * The encoding used write.
46     */
47    private final String outputEncoding;
48  
49    /**
50     * The cleaner instance to use.
51     */
52    private final HtmlCleaner cleaner;
53  
54    /**
55     * Javadoc helper.
56     */
57    private final JavadocCommentHelper helper = JavadocCommentHelper.createHtml();
58  
59    // ****************************** Initializer *******************************
60  
61    // ****************************** Constructors ******************************
62  
63    /**
64     * Default constructor.
65     *
66     * @param outputEncoding the encoding used write.
67     */
68    public HtmlUtils(final String outputEncoding)
69    {
70      this.outputEncoding = Arg.checkNotBlank("outputEncoding", outputEncoding);
71  
72      cleaner = new HtmlCleaner();
73    }
74  
75    // ****************************** Inner Classes *****************************
76  
77    // ********************************* Methods ********************************
78  
79    // --- init -----------------------------------------------------------------
80  
81    // --- get&set --------------------------------------------------------------
82  
83    /**
84     * Returns the encoding used write.
85     *
86     * @return the encoding used write.
87     */
88    public String getOutputEncoding()
89    {
90      return outputEncoding;
91    }
92  
93    // --- business -------------------------------------------------------------
94  
95    /**
96     * Cleans up the HTML fragment and thereby also cleans Javadoc fragments.
97     *
98     * @param htmlFragment the dirty fragment.
99     * @return the cleaned up fragment.
100    */
101   public String cleanHtmlAndJavadoc(final String htmlFragment)
102   {
103     final String javadocCleaned =
104         helper.expandFirstBlock(helper.replaceJavadocInlines(htmlFragment));
105     final String htmlCleaned = clean(javadocCleaned);
106     return htmlCleaned;
107   }
108 
109   /**
110    * Cleans up the HTML fragment.
111    *
112    * @param htmlFragment the dirty fragment.
113    * @return the cleaned up fragment.
114    */
115   public String clean(final String htmlFragment)
116   {
117     if (StringUtils.isBlank(htmlFragment))
118     {
119       return htmlFragment;
120     }
121     final TagNode tagNode = cleaner.clean(htmlFragment);
122     final TagNode body = tagNode.findElementByName("body", false);
123 
124     final ByteArrayOutputStream out = new ByteArrayOutputStream(1024);
125     final CleanerProperties cleanerProps = createCleanerProperties();
126     try
127     {
128       final SimpleXmlSerializer serializer =
129           new SimpleXmlSerializer(cleanerProps);
130       serializer.writeToStream(body, out, outputEncoding, true);
131       final String content = out.toString(outputEncoding);
132       return content;
133     }
134     catch (final IOException e)
135     {
136       throw new IllegalStateException(
137           "Streaming error with in memory stream or encoding.", e);
138     }
139   }
140 
141   /**
142    * Cleans up the HTML fragment.
143    *
144    * @param htmlFragment the dirty fragment.
145    * @return the cleaned up fragment.
146    */
147   public Document cleanJDom(final String htmlFragment)
148   {
149     if (StringUtils.isBlank(htmlFragment))
150     {
151       return null;
152     }
153 
154     final TagNode tagNode = cleaner.clean(htmlFragment);
155     final TagNode body = tagNode.findElementByName("body", false);
156     final CleanerProperties cleanerProps = createCleanerProperties();
157     final Document document =
158         new JDomSerializer(cleanerProps, true).createJDom(body);
159     return document;
160   }
161 
162   private static CleanerProperties createCleanerProperties()
163   {
164     final CleanerProperties properties = new CleanerProperties()
165     {
166 
167       // CHECKSTYLE:OFF analogous to the implementation that gets fixed here.
168       ITagInfoProvider tagInfoProvider = new DefaultTagProvider(); // NOPMD
169 
170       // CHECKSTYLE:ON
171 
172       @Override
173       public ITagInfoProvider getTagInfoProvider()
174       {
175         return tagInfoProvider;
176       }
177     };
178 
179     properties.setOmitHtmlEnvelope(true);
180     properties.setOmitDoctypeDeclaration(true);
181     properties.setOmitXmlDeclaration(true);
182     properties.setUseEmptyElementTags(true);
183 
184     return properties;
185   }
186 
187   // --- object basics --------------------------------------------------------
188 
189 }