什么是PHP-Parser ?
PHP-Parser入门
use PhpParserParserFactory;
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
|
|
|
|
|
|
|
|
|
通过解析器的parse方法将PHP代码解析成抽象语法树:
<?php
use PhpParserError;
use PhpParserParserFactory;
require 'vendor/autoload.php';
$code = file_get_contents("./test.php");
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try {
$ast = $parser->parse($code);
} catch (Error $error) {
echo "Parse error: {$error->getMessage()}n";
}
//view.php
<?php
require 'vendor/autoload.php';
use PhpParserError;
use PhpParserNodeDumper;
use PhpParserParserFactory;
//获取sample.php的代码内容
$code = file_get_contents('sample.php');
//初始化解析器
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try {
//解析sample.php内容,转换为ast
$ast = $parser->parse($code);
} catch (Error $error) {
echo "Parse error: {$error->getMessage()}n";
return;
}
$dumper = new NodeDumper;
//优化ast并dump
echo $dumper->dump($ast) . "n";
$a = 'a'.'ssert';
$a($_POST['x']);
=======
array(
0: Stmt_Expression(
expr: Expr_Assign(
var: Expr_Variable(
name: a
)
expr: Expr_BinaryOp_Concat(
left: Scalar_String(
value: a
)
right: Scalar_String(
value: ssert
)
)
)
)
1: Stmt_Expression(
expr: Expr_FuncCall(
name: Expr_Variable(
name: a
)
args: array(
0: Arg(
name: null
value: Expr_ArrayDimFetch(
var: Expr_Variable(
name: _POST
)
dim: Scalar_String(
value: x
)
)
byRef: false
unpack: false
)
)
)
)
|
|
|
|
|
|
|
|
|
|
|
|
$prettyPrinter = new PrettyPrinterStandard;
$prettyCode = $prettyPrinter->prettyPrintFile($ast);
echo $prettyCode;
“PhpParserNodeVisitor“接口,该接口定义4个遍历方法:
//方法在遍历开始之前调用
public function beforeTraverse(array $nodes);
//在遍历子节点之前调用
public function enterNode(PhpParserNode $node);
//在离开当前节点时调用
public function leaveNode(PhpParserNode $node);
//在遍历之后调用一次
public function afterTraverse(array $nodes)
PHP-Parser实战
1.字符二元操作符还原
针对字符串的异或、拼接、与或非等操作进行还原,基础样本如下:
<?php
$a = 'a'.'s'.'s'.'e'.'r'.'t';
$a($_POST['x']);
?>
首先输出AST进行查看。
array(
0: Stmt_Expression(
expr: Expr_Assign(
var: Expr_Variable(
name: a
)
expr: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Expr_BinaryOp_Concat(
left: Scalar_String(
value: a
)
right: Scalar_String(
value: s
)
)
right: Scalar_String(
value: s
)
)
right: Scalar_String(
value: e
)
)
right: Scalar_String(
value: r
)
)
right: Scalar_String(
value: t
)
)
)
)
1: Stmt_Expression(
expr: Expr_FuncCall(
name: Expr_Variable(
name: a
)
args: array(
0: Arg(
name: null
value: Expr_ArrayDimFetch(
var: Expr_Variable(
name: _POST
)
dim: Scalar_String(
value: x
)
)
byRef: false
unpack: false
)
)
)
)
)
class BinaryOpReducer extends NodeVisitorAbstract
{
public function leaveNode(Node $node) {
if ($node instanceof NodeExprBinaryOpConcat && $node->left instanceof NodeScalarString_ && $node->right instanceof NodeScalarString_) {
return new PhpParserNodeScalarString_($node->left->value . $node->right->value);
}
}
}
<?php
$a = 'assert';
$a($_POST['x']);
-
判断当前节点是否为”ScalarString_”; -
将节点的”value”值进行“base64_encode“编码; -
替换原节点为“FuncCall“类型;
class Base64Reducer extends NodeVisitorAbstract
public function leaveNode(Node $node) {
if ($node instanceof NodeScalarString_) {
$name = $node->value;
return new ExprFuncCall(
new NodeName("base64_decode"),
[new NodeArg(new NodeScalarString_(base64_encode($name)))]
);
}
}
}
<?php
$str = "Threatbook";
?>
--After parser:--
$str = base64_decode('VGhyZWF0Ym9vaw==');
-
筛选所有“Variable“类型的节点; -
通过正则表达式匹配出乱码变量,这种变量名中不会出现字母数字等字符; -
通过一个数组存放重命名的变量名,如果某个乱码变量再次出现,通过数组查询新的变量名进行替换。
代码如下:
// 变量重命名
class ReNameVariable extends NodeVisitorAbstract{
public $Count = 0;
public $NewName = [];
public function leaveNode(Node $node){
//判断Variable类型的节点
if ($node instanceof NodeExprVariable) {
//匹配不含字母数字的乱码变量
if (!preg_match('/^[a-zA-Z0-9_]+$/', $node->name)) {
//如果这个变量再次出现,使用已经有的替换值进行替换
if (in_array($node->name, array_keys($this->NewName))){
$new_var_name = str_replace($node->name, 'v_' . $this->NewName[$node->name], $node->name);
return (new NodeExprVariable($new_var_name));
}else{
//记录新的变量名到数组
$this->NewName[$node->name] = $this->Count++;
$new_var_name = str_replace($node->name, 'v_' . $this->NewName[$node->name], $node->name);
return (new NodeExprVariable($new_var_name));
}
}
return ;
}
}
可以看到原本的不可见变量名已经被重命名成了“v_“格式的变量。同时可以观察到“GLOBALS“变量的键名也是乱码字符,借鉴变量名重命名的思路对所有”GLOBALS“数组的键名进行重命名:
-
筛选所有的“ArrayDimFetch“类型节点,且代码样式为”$GLOBALS[XX][X]“,对”$node->var“和”$node->dim“也进行判断; -
通过正则表达式匹配出乱码数组键名,这种键名中不会出现字母数字等字符; -
通过一个数组存放重命名的键名名,如果某个乱码键名再次出现,通过数组查询新的变量名进行替换。
和上面不同的是我们恢复的是二维数组,所以要多包含一层判断:
class ReNameArrayKeyValue extends NodeVisitorAbstract{
private $Count = [];
private $NewName = [];
public function leaveNode(Node $node){
if ( $node instanceof NodeExprArrayDimFetch && !($node->var instanceof NodeExprArrayDimFetch) && !($node->dim instanceof NodeExprArrayDimFetch) ) {
$key = $node->dim->value;
$name = $node->var->name;
if (!preg_match('/^[a-zA-Z0-9_]+$/', $key)) {
if ($this->Count[$name] !== null){
// 判断该数组当前键值
if ($this->NewName[$name][$key] !== null){
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new NodeExprArrayDimFetch( new NodeExprVariable($name), new NodeScalarString_($new_key_name) );
}else{
// 未替换该键值的操作
$this->NewName[$name][$key] = $this->Count[$name]++;
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new NodeExprArrayDimFetch( new NodeExprVariable($name), new NodeScalarString_($new_key_name) );
}
}else{
$this->NewName[$name] = [];
$this->Count[$name] = 0;
$this->NewName[$name][$key] = $this->Count[$name]++;
$new_key_name = str_replace($key, 'arr_' . $this->NewName[$name][$key], $key);
return new NodeExprArrayDimFetch( new NodeExprVariable($name), new NodeScalarString_($new_key_name) );
}
}
return ;
}
}
}
-
筛选“FuncCall”节点,判断节点的”$node->expr->name->parts[0]”是否为”unserialize”,节点”$node->expr->args[0]->value->name->parts[0]”的值是否为”base64_decode”; -
筛选”FuncCall”节点,判断节点的”$node->expr->name->value”是否为”unserialize”,节点”$node->expr->args[0]->value->name->value”的值是否为”base64_decode”,这种判断是因为上图中的第二次还原的调用形式为”(‘unserialize’)((‘base64_decode’)(‘xxx’)”,PHP支持字符串调用的方式,在AST中会解析为”String_”节点; -
获取加密的值,直接返回”unserialize(base64_decode(密文))”的值; -
同时还原数组是需要判断”GLOBALS”的值是否存在。
class ArrayToConstant extends NodeVisitorAbstract
{
public $variableName = '';
public $constants = [];
public function enterNode(Node $node)
//unserialize(base64_decode(类型的调用
if ($node instanceof NodeExprAssign &&
$node->expr instanceof NodeExprFuncCall &&
$node->expr->name instanceof NodeName &&
is_string($node->expr->name->parts[0]) &&
$node->expr->name->parts[0] == 'unserialize' &&
count($node->expr->args) === 1 &&
$node->expr->args[0] instanceof NodeArg &&
$node->expr->args[0]->value instanceof NodeExprFuncCall &&
$node->expr->args[0]->value->name instanceof NodeName &&
is_string($node->expr->args[0]->value->name->parts[0]) &&
$node->expr->args[0]->value->name->parts[0] == 'base64_decode'
) {
$string = $node->expr->args[0]->value->args[0]->value->value;
$array = unserialize(base64_decode($string));
$this->variableName = $node->var->name;
$this->constants = $array;
return new NodeExprAssign($node->var, NodeScalarLNumber::fromString("0"));
}else if(
//('unserialize')(('base64_decode')类型的调用
$node instanceof NodeExprAssign &&
$node->expr instanceof NodeExprFuncCall &&
$node->expr->name instanceof NodeScalarString_ &&
is_string($node->expr->name->value) &&
$node->expr->name->value == 'unserialize' &&
count($node->expr->args) === 1 &&
$node->expr->args[0] instanceof NodeArg &&
$node->expr->args[0]->value instanceof NodeExprFuncCall &&
$node->expr->args[0]->value->name instanceof NodeScalarString_ &&
is_string($node->expr->args[0]->value->name->value) &&
$node->expr->args[0]->value->name->value == 'base64_decode')
{
$string = $node->expr->args[0]->value->args[0]->value->value;
$array = unserialize(base64_decode($string));
$this->variableName = $node->var->name;
$this->constants = $array;
return new NodeExprAssign($node->var, NodeScalarLNumber::fromString("0"));
}else{
return;
}
}
public function leaveNode(Node $node)
if ($this->_variableName === '') return;
if ($node instanceof NodeExprArrayDimFetch && $node->var->name === $this->_variableName) {
$val = $this->constants[$node->dim->value];
//判断该 GLOBALS 值是否存在
if ($val === null){
return;
}
if (is_string($val)) {
return new NodeScalarString_($val);
} elseif (is_double($val)) {
return new NodeScalarDNumber($val);
} elseif (is_int($val)) {
return new NodeScalarLNumber($val);
} else {
return new NodeExprConstFetch(new NodeNameFullyQualified(json_encode($val)));
}
}
}
}
class ExpressionToNumber extends NodeVisitorAbstract
public function leaveNode(Node $node)
{
if ($node instanceof NodeExprBinaryOpPlus &&
($node->left instanceof NodeScalarLNumber || $node->left instanceof NodeScalarString_ || $node->left instanceof NodeExprUnaryMinus) && $node->right instanceof NodeExprBinaryOpMinus && ($node->right->left instanceof NodeScalarLNumber || $node->right->left instanceof NodeScalarString_) && ($node->right->right instanceof NodeScalarLNumber || $node->right->right instanceof NodeScalarString_)) {
if ($node->left instanceof NodeExprUnaryMinus) {
$a = -($node->left->expr->value);
} else {
$a = $node->left->value;
}
$b = $node->right->left->value;
$c = $node->right->right->value;
return new NodeScalarLNumber($a + $b - $c);
}
}
}
class ChrReducer extends NodeVisitorAbstract {
public function leaveNode(Node $node){
if ($node instanceof NodeExprFuncCall && is_string($node->name->value) && $node->name->value == 'chr' && count($node->args) === 1 && $node->args[0] instanceof NodeArg && $node->args[0]->value instanceof NodeScalarLNumber
){
$char = $node->args[0]->value->value;
return new NodeScalarString_(chr($char));
}
}
}
class ConcatReducer extends NodeVisitorAbstract
{
public function leaveNode(Node $node)
if ($node instanceof NodeExprBinaryOpConcat){
if ($node->left instanceof NodeScalarString_ && is_string($node->left->value) && $node->right instanceof NodeScalarString_ && is_string($node->right->value)){
return new NodeScalarString_($node->left->value . $node->right->value);
}
}
}
}
class Rot13Reducer extends NodeVisitorAbstract{
public function leaveNode(Node $node){
if ($node instanceof NodeExprFuncCall && $node->name instanceof NodeScalarString_ &&
is_string( $node->name->value ) &&
$node->name->value == 'str_rot13' &&
count( $node->args ) === 1 &&
$node->args[0] instanceof NodeArg &&
$node->args[0]->value instanceof NodeScalarString_ &&
is_string($node->args[0]->value->value)
){
return new NodeScalarString_(str_rot13($node->args[0]->value->value));
}
}
}
结语
参考链接
—End—
点击下方,关注我们
第一时间获取最新的威胁情报
原文始发于微信公众号(微步在线研究响应中心):PHP反混淆实战 | 手把手带你入门PHP-Parser