Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F608982
HTML5Tidy.java
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
•
tstarling
Aug 6 2015, 11:27 AM
2015-08-06 11:27:22 (UTC+0)
Size
1 KB
Referenced Files
None
Subscribers
None
HTML5Tidy.java
View Options
package
org.wikimedia.html5tidy
;
import
javax.servlet.*
;
import
java.io.IOException
;
import
java.io.ByteArrayInputStream
;
import
java.io.ByteArrayOutputStream
;
import
java.util.Enumeration
;
import
java.nio.charset.Charset
;
import
org.xml.sax.ContentHandler
;
import
org.xml.sax.InputSource
;
import
org.xml.sax.SAXException
;
import
nu.validator.encoding.Encoding
;
import
nu.validator.htmlparser.common.XmlViolationPolicy
;
import
nu.validator.htmlparser.sax.HtmlParser
;
import
nu.validator.htmlparser.sax.HtmlSerializer
;
public
class
HTML5Tidy
extends
GenericServlet
{
public
void
service
(
ServletRequest
req
,
ServletResponse
res
)
throws
ServletException
,
IOException
{
req
.
setCharacterEncoding
(
"UTF-8"
);
String
text
=
req
.
getParameter
(
"text"
);
if
(
text
==
null
)
{
throw
new
ServletException
(
"The text parameter must be given"
);
}
Charset
utf8
;
try
{
utf8
=
Charset
.
forName
(
"UTF-8"
);
}
catch
(
IllegalArgumentException
e
)
{
throw
new
ServletException
(
"No UTF-8"
,
e
);
}
byte
[]
input
=
req
.
getParameter
(
"text"
).
getBytes
(
utf8
);
ByteArrayOutputStream
sink
=
new
ByteArrayOutputStream
();
ContentHandler
serializer
=
new
HtmlSerializer
(
sink
);
HtmlParser
parser
=
new
HtmlParser
(
XmlViolationPolicy
.
ALLOW
);
parser
.
setContentHandler
(
serializer
);
try
{
parser
.
setProperty
(
"http://xml.org/sax/properties/lexical-handler"
,
serializer
);
InputSource
source
=
new
InputSource
(
new
ByteArrayInputStream
(
input
));
source
.
setEncoding
(
"UTF-8"
);
parser
.
parse
(
source
);
}
catch
(
SAXException
e
)
{
throw
new
ServletException
(
"Error parsing HTML"
,
e
);
}
// HtmlSerializer writes UTF-8 by default
res
.
setContentType
(
"text/html;charset=UTF-8"
);
res
.
getOutputStream
().
write
(
sink
.
toByteArray
());
}
};
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
584829
Default Alt Text
HTML5Tidy.java (1 KB)
Attached To
Mode
P1843 HTML5Tidy.java
Attached
Detach File
Event Timeline
Log In to Comment