Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F28388046
bm.php
ssastry (Subbu)
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Authored By
ssastry
Mar 14 2019, 3:30 PM
2019-03-14 15:30:34 (UTC+0)
Size
8 KB
Referenced Files
None
Subscribers
None
bm.php
View Options
#!/usr/bin/env hhvm
<?php
if
(
PHP_SAPI
!==
'cli'
)
{
exit
;
}
require
__DIR__
.
'/../vendor/autoload.php'
;
use
RemexHtml\DOM
;
use
RemexHtml\Tokenizer
;
use
RemexHtml\TreeBuilder
;
use
RemexHtml\Serializer
;
class
NullHandler
implements
Tokenizer\TokenHandler
{
function
startDocument
(
Tokenizer\Tokenizer
$t
,
$fns
,
$fn
)
{}
function
endDocument
(
$pos
)
{}
function
error
(
$text
,
$pos
)
{}
function
characters
(
$text
,
$start
,
$length
,
$sourceStart
,
$sourceLength
)
{}
function
startTag
(
$name
,
Tokenizer\Attributes
$attrs
,
$selfClose
,
$sourceStart
,
$sourceLength
)
{}
function
endTag
(
$name
,
$sourceStart
,
$sourceLength
)
{}
function
doctype
(
$name
,
$public
,
$system
,
$quirks
,
$sourceStart
,
$sourceLength
)
{}
function
comment
(
$text
,
$sourceStart
,
$sourceLength
)
{}
}
class
NullTreeHandler
implements
TreeBuilder\TreeHandler
{
function
startDocument
(
$fns
,
$fn
)
{}
function
endDocument
(
$pos
)
{}
function
characters
(
$parent
,
$refNode
,
$text
,
$start
,
$length
,
$sourceStart
,
$sourceLength
)
{}
function
insertElement
(
$parent
,
$refNode
,
TreeBuilder\Element
$element
,
$void
,
$sourceStart
,
$sourceLength
)
{}
function
endTag
(
TreeBuilder\Element
$element
,
$sourceStart
,
$sourceLength
)
{}
function
doctype
(
$name
,
$public
,
$system
,
$quirks
,
$sourceStart
,
$sourceLength
)
{}
function
comment
(
$parent
,
$refNode
,
$text
,
$sourceStart
,
$sourceLength
)
{}
function
error
(
$text
,
$pos
)
{}
function
mergeAttributes
(
TreeBuilder\Element
$element
,
Tokenizer\Attributes
$attrs
,
$sourceStart
)
{}
function
removeNode
(
TreeBuilder\Element
$element
,
$sourceStart
)
{}
function
reparentChildren
(
TreeBuilder\Element
$element
,
TreeBuilder\Element
$newParent
,
$sourceStart
)
{}
}
function
reserialize
(
$text
)
{
$handler
=
new
Tokenizer\TokenSerializer
;
$tokenizer
=
new
Tokenizer\Tokenizer
(
$handler
,
$text
,
[]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
print
$handler
->
getOutput
()
.
"
\n
"
;
foreach
(
$handler
->
getErrors
()
as
$error
)
{
print
"Error at {$error[1]}: {$error[0]}
\n
"
;
}
}
function
reserializeState
(
$text
,
$state
,
$endTag
)
{
$handler
=
new
Tokenizer\TokenSerializer
;
$tokenizer
=
new
Tokenizer\Tokenizer
(
$handler
,
$text
,
[]
);
$tokenizer
->
execute
(
[
'state'
=>
$state
,
'appropriateEndTag'
=>
$endTag
]
);
print
$handler
->
getOutput
()
.
"
\n
"
;
foreach
(
$handler
->
getErrors
()
as
$error
)
{
print
"Error at {$error[1]}: {$error[0]}
\n
"
;
}
}
function
reserializeScript
(
$text
)
{
reserializeState
(
$text
,
Tokenizer\Tokenizer
::
STATE_SCRIPT_DATA
,
'script'
);
}
function
reserializeXmp
(
$text
)
{
reserializeState
(
$text
,
Tokenizer\Tokenizer
::
STATE_RCDATA
,
'xmp'
);
}
function
trace
(
$text
)
{
$traceCallback
=
function
(
$msg
)
{
print
"$msg
\n
"
;
};
$formatter
=
new
Serializer\HtmlFormatter
;
$serializer
=
new
Serializer\Serializer
(
$formatter
);
$treeTracer
=
new
TreeBuilder\TreeMutationTracer
(
$serializer
,
$traceCallback
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$treeTracer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$dispatchTracer
=
new
TreeBuilder\DispatchTracer
(
$text
,
$dispatcher
,
$traceCallback
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatchTracer
,
$text
,
[]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
print
$serializer
->
getResult
()
.
"
\n
"
;
}
function
traceDestruct
(
$text
)
{
$traceCallback
=
function
(
$msg
)
{
print
"$msg
\n
"
;
};
$destructTracer
=
new
TreeBuilder\DestructTracer
(
$traceCallback
);
$treeTracer
=
new
TreeBuilder\TreeMutationTracer
(
$destructTracer
,
$traceCallback
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$treeTracer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$dispatchTracer
=
new
TreeBuilder\DispatchTracer
(
$text
,
$dispatcher
,
$traceCallback
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatchTracer
,
$text
,
[]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
}
function
tidy
(
$text
)
{
$error
=
function
(
$msg
,
$pos
)
{
// print " * [$pos] $msg\n";
};
$formatter
=
new
Serializer\HtmlFormatter
;
$serializer
=
new
Serializer\Serializer
(
$formatter
,
$error
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$serializer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
// print $serializer->getResult() . "\n";
}
function
test
(
$text
)
{
$error
=
function
(
$msg
,
$pos
)
{
print
" * [$pos] $msg
\n
"
;
};
$formatter
=
new
Serializer\TestFormatter
;
$serializer
=
new
Serializer\Serializer
(
$formatter
,
$error
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$serializer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
print
$serializer
->
getResult
()
.
"
\n
"
;
}
function
tidyViaDOM
(
$text
)
{
$error
=
function
(
$msg
,
$pos
)
{
//print " * [$pos] $msg\n";
};
$formatter
=
new
Serializer\HtmlFormatter
;
$domBuilder
=
new
DOM\DOMBuilder
(
$error
);
$serializer
=
new
DOM\DOMSerializer
(
$domBuilder
,
$formatter
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$serializer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
[]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
// print $serializer->getResult() . "\n";
}
function
testViaDOM
(
$text
)
{
$error
=
function
(
$msg
,
$pos
)
{
print
" * [$pos] $msg
\n
"
;
};
$formatter
=
new
Serializer\TestFormatter
;
$domBuilder
=
new
DOM\DOMBuilder
(
$error
);
$serializer
=
new
DOM\DOMSerializer
(
$domBuilder
,
$formatter
);
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$serializer
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
[]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
// print $serializer->getResult() . "\n";
}
function
benchmarkTreeBuilder
(
$text
)
{
$time
=
-
microtime
(
true
);
$handler
=
new
NullTreeHandler
;
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$handler
,
[]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
$time
+=
microtime
(
true
);
print
"$time
\n
"
;
}
function
benchmarkDOM
(
$text
)
{
$time
=
-
microtime
(
true
);
$domBuilder
=
new
DOM\DOMBuilder
;
$treeBuilder
=
new
TreeBuilder\TreeBuilder
(
$domBuilder
,
[
'ignoreErrors'
=>
true
]
);
$dispatcher
=
new
TreeBuilder\Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer\Tokenizer
(
$dispatcher
,
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
$tokenizer
->
execute
(
$GLOBALS
[
'executeOptions'
]
);
$time
+=
microtime
(
true
);
print
"$time
\n
"
;
}
function
generate
(
$text
)
{
$generator
=
Tokenizer\TokenGenerator
::
generate
(
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
foreach
(
$generator
as
$token
)
{
if
(
$token
[
'type'
]
===
'text'
)
{
$token
[
'text'
]
=
substr
(
$token
[
'text'
],
$token
[
'start'
],
$token
[
'length'
]
);
unset
(
$token
[
'start'
]
);
unset
(
$token
[
'length'
]
);
}
print_r
(
$token
);
}
}
function
benchmarkGenerate
(
$text
)
{
$time
=
-
microtime
(
true
);
$generator
=
Tokenizer\TokenGenerator
::
generate
(
$text
,
$GLOBALS
[
'tokenizerOptions'
]
);
foreach
(
$generator
as
$token
)
{
}
$time
+=
microtime
(
true
);
print
"$time
\n
"
;
}
$text
=
file_get_contents
(
$argv
[
1
]);
$tokenizerOptions
=
[
'ignoreNulls'
=>
true
,
'ignoreCharRefs'
=>
true
,
'ignoreErrors'
=>
true
,
'skipPreprocess'
=>
true
,
];
$executeOptions
=
[
// 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
// 'fragmentName' => 'div'
];
print
"---- Tree builder ----
\n
"
;
for
(
$i
=
0
;
$i
<
10
;
$i
++)
{
print
"Iteration $i:"
;
benchmarkTreeBuilder
(
$text
);
}
print
"---- DOM ----
\n
"
;
for
(
$i
=
0
;
$i
<
10
;
$i
++)
{
print
"Iteration $i:"
;
benchmarkDOM
(
$text
);
}
print
"---- Generate? ----
\n
"
;
for
(
$i
=
0
;
$i
<
10
;
$i
++)
{
print
"Iteration $i:"
;
benchmarkGenerate
(
$text
);
}
print
"---- DOM + serialize ----
\n
"
;
for
(
$i
=
0
;
$i
<
10
;
$i
++)
{
print
"Iteration $i:"
;
$time
=
-
microtime
(
true
);
tidyViaDOM
(
$text
);
$time
+=
microtime
(
true
);
print
"$time
\n
"
;
}
print
"---- SAX + serialize ----
\n
"
;
for
(
$i
=
0
;
$i
<
10
;
$i
++)
{
print
"Iteration $i:"
;
$time
=
-
microtime
(
true
);
tidy
(
$text
);
$time
+=
microtime
(
true
);
print
"$time
\n
"
;
}
File Metadata
Details
Attached
Mime Type
text/x-c++
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
7181748
Default Alt Text
bm.php (8 KB)
Attached To
Mode
T204595: Evaluate and document performance of RemexHtml vs Domino
Attached
Detach File
Event Timeline
Log In to Comment