今天图老师小编要向大家分享个用C实现PHP扩展 Fetch_Url 类数据抓取的方法教程,过程简单易学,相信聪明的你一定能轻松get!
【 tulaoshi.com - 编程语言 】
一、类文档说明
代码如下:
class FetchUrl{
function __construct();
//返回网页内容 常用于fetch()方法返回false时
function body();
//将对象的数据重新初始化,用于多次重用一个FetchUrl对象
function clean();
//返回错误信息
function errmsg();
//返回错误码,0表示有错误
function errcode();
/**
* 发起请求
* $url string 请求地址
* $callback function 匿名函数
*/
function fetch(string $url, function $callback);
//请求返回HTTP Code
function httpCode();
//请求返回Cookies数组
function responseCookies();
//请求返回头部信息数组
function responseHeaders();
//是否允许截断,默认为不允许
function setAllowRedirect(bool $allow=false);
//设置连接超时时间
function setConnectTimeout(int $seconds=5);
//在发起的请求中,添加cookie数据
function setCookie(string $name, string $value);
//在发起的请求中,批量添加cookie数据
function setCookies(array $cookies);
//设置请求的方法(POST/GET)
function setMethod(string $method="get");
//设置POST方法的数据
function setPostData(array $data);
//设置读取超时时间
function setReadTimeout(int $seconds=60);
function __destroy();
}
二、使用案例
代码如下:
?php
/*GET抓取http://www.baidu.com*/
/*
$fetch_url = new FetchUrl();
$fetch_url-setAllowRedirect(true);
$fetch_url-fetch('http://www.baidu.com');
*/
$cookies = array(
'wei_xin_wb_session'='value',
'wei_xin_wxblog_authcoder'='value');
/*POST提交数据*/
/*
$fetch_url = new FetchUrl();
$fetch_url-setMethod('post');
$data = array(
'step'=2,
'pays[1]'=0,
'pays[2]'=0,
'pays[3]'=0
);
$fetch_url-setCookies($cookies);
$fetch_url-setPostData($data);
$fetch_url-fetch('http://test.wx.pp.cc/wb_advs/manage?inajax=1');
*/
//POST上传数据和文件
$fetch_url = new FetchUrl();
$fetch_url-setAllowRedirect(true);
$fetch_url-setMethod('post');
$data = array(
'nickname'='挺好a',
'wxnickname'='good',
'wxusername'='good',
'intro'='good'
);
$fetch_url-setCookies($cookies);
$fetch_url-setPostData($data);
$binary = file_get_contents("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif");
$fetch_url-setBinary("picfile", "demo.jpg", $binary);//上传二进制文件
// $fetch_url-setFile("picfile", "C:/Users/Administrator/Desktop/123.jpg");//上传指定文件
if($fetch_url-errcode() == 0){
$fetch_url-fetch('http://wx.pp.cc/wb_ajax/addwxuser/0');
if($fetch_url-httpCode() == 200){
$html = $fetch_url-body();
echo $html;
}
}else{
echo "errmsg:".$fetch_url-errmsg().", errcode:".$fetch_url-errcode();
}
//返回请求头部信息
print_r($fetch_url-responseHeaders());
//清空之前的请求设置,复用$fetch_url。
$fetch_url-clean();
$fetch_url-fetch("http://www.baidu.com");
print_r($fetch_url-responseHeaders());
三、扩展实现
1.php_fetch_url.h
代码如下:
/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2012 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifndef PHP_FETCH_URL_H
#define PHP_FETCH_URL_H
extern zend_module_entry fetch_url_module_entry;
#define phpext_fetch_url_ptr &fetch_url_module_entry
#ifdef PHP_WIN32
# define PHP_FETCH_URL_API __declspec(dllexport)
#elif defined(__GNUC__) && __GNUC__ = 4
# define PHP_FETCH_URL_API __attribute__ ((visibility("default")))
#else
# define PHP_FETCH_URL_API
#endif
#ifdef PHP_WIN32
#define FETCH_CURL_MODE CURL_GLOBAL_WIN32
#else
#define FETCH_CURL_MODE CURL_GLOBAL_ALL
#endif
#ifdef ZTS
#include "TSRM.h"
#endif
#define FETCH_CLASS_NAME "FetchUrl"
#define FETCH_CLASS_CE g_fetch_ce
#define FETCH_THIS Z_OBJCE_P(getThis()), getThis()
#define FETCH_ERROR(errmsg, errno) zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), errmsg, sizeof(errmsg)-1 TSRMLS_CC);
zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), errno TSRMLS_CC)
PHP_MINIT_FUNCTION(fetch_url);
PHP_MSHUTDOWN_FUNCTION(fetch_url);
PHP_RINIT_FUNCTION(fetch_url);
PHP_RSHUTDOWN_FUNCTION(fetch_url);
PHP_MINFO_FUNCTION(fetch_url);
#ifdef ZTS
#define FETCH_URL_G(v) TSRMG(fetch_url_globals_id, zend_fetch_url_globals *, v)
#else
#define FETCH_URL_G(v) (fetch_url_globals.v)
#endif
#endif /* PHP_FETCH_URL_H */
2.fetch_url.c
代码如下:
/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2012 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_ini.h"
#include "main/SAPI.h"
#include "Zend/zend_interfaces.h"
#include "ext/standard/info.h"
#include "ext/standard/php_var.h"
#include "ext/standard/php_string.h"
#include "ext/standard/php_smart_str.h"
#include "ext/standard/url.h"
#include "ext/pcre/php_pcre.h"
#include "php_fetch_url.h"
#include curl/curl.h
zend_class_entry *g_fetch_ce;
ZEND_BEGIN_ARG_INFO_EX(void_arginfo, 0, 0, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(fetch_arginfo, 0, 0, 1)
ZEND_ARG_INFO(0, url)
ZEND_ARG_INFO(0, callback)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(responseCookies_arginfo, 0, 0, 0)
ZEND_ARG_INFO(0, all)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(responseHeaders_arginfo, 0, 0, 0)
ZEND_ARG_INFO(0, parse)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setAllowRedirect_arginfo, 0, 0, 0)
ZEND_ARG_INFO(0, allow)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setConnectTimeout_arginfo, 0, 0, 0)
ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setCookie_arginfo, 0, 0, 2)
ZEND_ARG_INFO(0, name)
ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setCookies_arginfo, 0, 0, 1)
ZEND_ARG_INFO(0, cookies)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setHeader_arginfo, 0, 0, 2)
ZEND_ARG_INFO(0, name)
ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setMethod_arginfo, 0, 0, 1)
ZEND_ARG_INFO(0, method)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setPostData, 0, 0, 1)
ZEND_ARG_INFO(0, post_data)
ZEND_ARG_INFO(0, multil)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setReadTimeout_arginfo, 0, 0, 0)
ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setBinary_arginfo, 0, 0, 3)
ZEND_ARG_INFO(0, post_filed)
ZEND_ARG_INFO(0, uploadfile_name)
ZEND_ARG_INFO(0, url)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(setFile_arginfo, 0, 0, 2)
ZEND_ARG_INFO(0, post_filed)
ZEND_ARG_INFO(0, path)
ZEND_END_ARG_INFO()
ZEND_METHOD(fetch_url, __construct){
}
ZEND_METHOD(fetch_url, setBinary){
zval *input_filed_name, *binary_data, *uploadfile_name;
zval *g_binary_data, *item_data;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &input_filed_name, &uploadfile_name, &binary_data) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(input_filed_name) != IS_STRING || Z_TYPE_P(uploadfile_name) != IS_STRING || Z_TYPE_P(binary_data) != IS_STRING){
RETURN_FALSE;
}
g_binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);
if(Z_TYPE_P(g_binary_data) == IS_NULL){
MAKE_STD_ZVAL(g_binary_data);
array_init(g_binary_data);
}
MAKE_STD_ZVAL(item_data);
array_init(item_data);
add_index_stringl(item_data, 0, Z_STRVAL_P(uploadfile_name), Z_STRLEN_P(uploadfile_name), 1);
add_index_stringl(item_data, 1, Z_STRVAL_P(binary_data), Z_STRLEN_P(binary_data), 1);
add_assoc_zval(g_binary_data, Z_STRVAL_P(input_filed_name), item_data);
zend_update_property(FETCH_THIS, ZEND_STRL("binary_data"), g_binary_data TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setFile){
zval *file_path, *input_filed_name;
zval *upload_filepaths;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &input_filed_name, &file_path) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(file_path) != IS_STRING || Z_TYPE_P(input_filed_name) != IS_STRING){
RETURN_FALSE;
}
upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);
if(Z_TYPE_P(upload_filepaths) == IS_NULL){
MAKE_STD_ZVAL(upload_filepaths);
array_init(upload_filepaths);
}
add_assoc_stringl(upload_filepaths, Z_STRVAL_P(input_filed_name), Z_STRVAL_P(file_path), Z_STRLEN_P(file_path), 1);
zend_update_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), upload_filepaths TSRMLS_CC);
}
ZEND_METHOD(fetch_url, body){
zval *zval_body;
zval_body = zend_read_property(FETCH_THIS, ZEND_STRL("body"), 0 TSRMLS_CC);
RETURN_STRINGL(Z_STRVAL_P(zval_body), Z_STRLEN_P(zval_body), 1);
}
ZEND_METHOD(fetch_url, clean){
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
zend_update_property_null(FETCH_THIS, ZEND_STRL("httpCode") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("send_headers"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), ZEND_STRL("") TSRMLS_CC);
zend_update_property_null(FETCH_THIS, ZEND_STRL("binary_data") TSRMLS_CC);
zend_update_property_null(FETCH_THIS, ZEND_STRL("upload_filepaths") TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
}
ZEND_METHOD(fetch_url, errmsg){
zval *errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);
RETURN_STRINGL(Z_STRVAL_P(errmsg), Z_STRLEN_P(errmsg), 1);
}
ZEND_METHOD(fetch_url, errcode){
zval *err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
RETURN_LONG(Z_LVAL_P(err_no));
}
static size_t read_data(void *buffer, size_t size, size_t nmemb, void *data){
smart_str *content = (smart_str*)data;
smart_str_appendl(content, buffer, size*nmemb);
return size*nmemb;
}
ZEND_METHOD(fetch_url, fetch){
CURLcode return_code;
CURL *curl_handler;
struct curl_slist *http_headers = NULL;
zval *url, *callback, *cookies, *connect_timeout, *allow_redirect, *method, *post_data, *read_timeout,
*send_headers, *err_no, *errmsg, *binary_data, *upload_filepaths;
smart_str body_str = {0}, header_str = {0};
struct curl_httppost *post = NULL;
struct curl_httppost *last = NULL;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &url, &callback) == FAILURE){
RETURN_FALSE;
}
err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);
if(Z_LVAL_P(err_no) 0){
php_printf("errno:%d, errmsg:%s", Z_LVAL_P(err_no), Z_STRVAL_P(errmsg));
RETURN_FALSE;
}
if(Z_TYPE_P(url) != IS_STRING){
FETCH_ERROR("fetch url must be string.", 500);
RETURN_FALSE;
}
return_code = curl_global_init(FETCH_CURL_MODE);
if(return_code != CURLE_OK){
curl_global_cleanup();
FETCH_ERROR("curl init failed.", 500);
RETURN_FALSE;
}
curl_handler = curl_easy_init();
if(NULL == curl_handler){
curl_easy_cleanup(curl_handler);
curl_global_cleanup();
FETCH_ERROR("get curl handler failed.", 500);
RETURN_FALSE;
}
cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
connect_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("connect_timeout"), 0 TSRMLS_CC);
read_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("read_timeout"), 0 TSRMLS_CC);
allow_redirect = zend_read_property(FETCH_THIS, ZEND_STRL("allow_redirect"), 0 TSRMLS_CC);
method = zend_read_property(FETCH_THIS, ZEND_STRL("method"), 0 TSRMLS_CC);
post_data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);
send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);
binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);
upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);
curl_easy_setopt(curl_handler, CURLOPT_URL, Z_STRVAL_P(url));
curl_easy_setopt(curl_handler, CURLOPT_COOKIE, Z_STRVAL_P(cookies));
curl_easy_setopt(curl_handler, CURLOPT_WRITEFUNCTION, &read_data);
curl_easy_setopt(curl_handler, CURLOPT_WRITEDATA, &body_str);
curl_easy_setopt(curl_handler, CURLOPT_HEADERDATA, &header_str);
curl_easy_setopt(curl_handler, CURLOPT_HEADERFUNCTION, &read_data);
curl_easy_setopt(curl_handler, CURLOPT_TIMEOUT, Z_LVAL_P(read_timeout));
curl_easy_setopt(curl_handler, CURLOPT_CONNECTTIMEOUT, Z_LVAL_P(connect_timeout));
curl_easy_setopt(curl_handler, CURLOPT_AUTOREFERER, Z_LVAL_P(allow_redirect));
curl_easy_setopt(curl_handler, CURLOPT_MAXREDIRS, 5);
if(strcmp(Z_STRVAL_P(method), "get") == 0){
curl_easy_setopt(curl_handler, CURLOPT_HTTPGET, 1);
}else{
if(Z_TYPE_P(binary_data) != IS_NULL || Z_TYPE_P(upload_filepaths) != IS_NULL){
zval *delim, *post_arr, *delim_equal;
MAKE_STD_ZVAL(delim_equal);
MAKE_STD_ZVAL(delim);
MAKE_STD_ZVAL(post_arr);
ZVAL_STRING(delim, "&", 1);
ZVAL_STRING(delim_equal, "=", 1);
array_init(post_arr);
php_explode(delim, post_data, post_arr, LONG_MAX);
for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(post_arr));
zend_hash_has_more_elements(Z_ARRVAL_P(post_arr)) == SUCCESS;
zend_hash_move_forward(Z_ARRVAL_P(post_arr))){
zval **data_str;
zval *temp_data, **post_data_name, **post_data_value, *temp_zval;
if(zend_hash_get_current_data(Z_ARRVAL_P(post_arr), (void**)&data_str) == FAILURE){
continue;
}
if(Z_STRLEN_PP(data_str) 0){
MAKE_STD_ZVAL(temp_data);
array_init(temp_data);
temp_zval = *data_str;
php_explode(delim_equal, temp_zval, temp_data, LONG_MAX);
zend_hash_index_find(Z_ARRVAL_P(temp_data), 0, (void**)&post_data_name);
zend_hash_index_find(Z_ARRVAL_P(temp_data), 1, (void**)&post_data_value);
curl_formadd(&post, &last, CURLFORM_COPYNAME, Z_STRVAL_PP(post_data_name), CURLFORM_COPYCONTENTS, Z_STRVAL_PP(post_data_value), CURLFORM_END);
zval_dtor(temp_data);
}
}
zval_dtor(post_arr);
zval_dtor(delim);
zval_dtor(delim_equal);
if(Z_TYPE_P(binary_data) != IS_NULL)
for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(binary_data));
zend_hash_has_more_elements(Z_ARRVAL_P(binary_data)) == SUCCESS;
zend_hash_move_forward(Z_ARRVAL_P(binary_data))){
char *input_file_name;
uint input_file_name_len;
ulong idx;
zval **item_data;
zval **upload_binary_data;
zval **uploadfile_name;
if(zend_hash_get_current_key_ex(Z_ARRVAL_P(binary_data), &input_file_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(Z_ARRVAL_P(binary_data), (void**)&item_data) == FAILURE){
continue;
}
zend_hash_index_find(Z_ARRVAL_PP(item_data), 0, (void**)&uploadfile_name);
zend_hash_index_find(Z_ARRVAL_PP(item_data), 1, (void**)&upload_binary_data);
curl_formadd(&post,
&last,
CURLFORM_COPYNAME,
input_file_name,
CURLFORM_BUFFER,
Z_STRVAL_PP(uploadfile_name), //todo:setBinary需要传递文件名参数
CURLFORM_BUFFERPTR,
Z_STRVAL_PP(upload_binary_data),
CURLFORM_BUFFERLENGTH,
Z_STRLEN_PP(upload_binary_data),
CURLFORM_END
);
}
if(Z_TYPE_P(upload_filepaths) != IS_NULL)
for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(upload_filepaths));
zend_hash_has_more_elements(Z_ARRVAL_P(upload_filepaths)) == SUCCESS;
zend_hash_move_forward(Z_ARRVAL_P(upload_filepaths))){
char *input_filed_name;
uint input_file_name_len;
ulong idx;
zval **file_path;
if(zend_hash_get_current_key_ex(Z_ARRVAL_P(upload_filepaths), &input_filed_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(Z_ARRVAL_P(upload_filepaths), (void**)&file_path) == FAILURE){
continue;
}
curl_formadd(&post, &last, CURLFORM_COPYNAME, input_filed_name, CURLFORM_FILE, Z_STRVAL_PP(file_path), CURLFORM_END);
}
curl_easy_setopt(curl_handler, CURLOPT_HTTPPOST, post);
http_headers = curl_slist_append(http_headers, estrdup("Expect:"));//防止出现HTTP 100跳转
}else{
curl_easy_setopt(curl_handler, CURLOPT_POSTFIELDS, Z_STRVAL_P(post_data));
curl_easy_setopt(curl_handler, CURLOPT_POST, 1);
}
}
if(Z_TYPE_P(send_headers) == IS_ARRAY && zend_hash_num_elements(Z_ARRVAL_P(send_headers)) 0){
for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(send_headers));
zend_hash_has_more_elements(Z_ARRVAL_P(send_headers)) == SUCCESS;
zend_hash_move_forward(Z_ARRVAL_P(send_headers))){
char *header_key;
uint header_keylen;
ulong idx;
zval **header_val;
smart_str impl_headers = {0};
if(zend_hash_get_current_key_ex(Z_ARRVAL_P(send_headers), &header_key, &header_keylen, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(Z_ARRVAL_P(send_headers), (void**)&header_val) == FAILURE){
continue;
}
smart_str_appendl(&impl_headers, header_key, header_keylen);
smart_str_appendl(&impl_headers, ": ", 2);
smart_str_appendl(&impl_headers, Z_STRVAL_PP(header_val), Z_STRLEN_PP(header_val));
http_headers = curl_slist_append(http_headers, impl_headers.c);
}
}
curl_easy_setopt(curl_handler, CURLOPT_HTTPHEADER, http_headers);
curl_easy_perform(curl_handler);
curl_slist_free_all(http_headers);
curl_formfree(post);
curl_easy_cleanup(curl_handler);
curl_global_cleanup();
smart_str_0(&body_str);
smart_str_0(&header_str);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), header_str.c, header_str.len TSRMLS_CC);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), body_str.c, body_str.len TSRMLS_CC);
}
ZEND_METHOD(fetch_url, httpCode){
pcre_cache_entry *pce;
zval *headers;
zval *result_match, *match_long, **http_code;
char *regex = estrdup("/^HTTP/1.1s(.*)sOK/");
if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
RETURN_FALSE;
}
MAKE_STD_ZVAL(result_match);
MAKE_STD_ZVAL(match_long);
headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);
php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 0, 0, 0, 0 TSRMLS_CC);
if(Z_LVAL_P(match_long) 0){
if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&http_code) == FAILURE){
RETURN_FALSE;
}else{
RETURN_STRINGL(Z_STRVAL_PP(http_code), Z_STRLEN_PP(http_code), 0);
}
}else{
RETURN_FALSE;
}
}
ZEND_METHOD(fetch_url, responseCookies){
pcre_cache_entry *pce;
zval *headers, *result_match, *match_long;
char *regex = estrdup("/Set-Cookie:s(.*?);/");
headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);
array_init(return_value);
if(Z_STRLEN_P(headers) 0){
if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
RETURN_NULL();
}
MAKE_STD_ZVAL(result_match);
MAKE_STD_ZVAL(match_long);
//void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 1, 0, 0, 0 TSRMLS_CC);
if(Z_LVAL_P(match_long) 0){
zval **result;
HashTable *result_ht;
char *found = NULL;
long found_offset;
char *cookie_name;
char *cookie_value;
if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&result) != FAILURE){
result_ht = Z_ARRVAL_PP(result);
for(zend_hash_internal_pointer_reset(result_ht);
zend_hash_has_more_elements(result_ht) == SUCCESS;
zend_hash_move_forward(result_ht)){
zval **tmpzval;
if(zend_hash_get_current_data(result_ht, (void**)&tmpzval) == FAILURE){
continue;
}
found = php_memnstr(Z_STRVAL_PP(tmpzval), "=", 1, Z_STRVAL_PP(tmpzval) + Z_STRLEN_PP(tmpzval));
found_offset = found - Z_STRVAL_PP(tmpzval);
cookie_name = estrndup(Z_STRVAL_PP(tmpzval), found_offset);
cookie_value= estrndup(found+1, strlen(found)-1);
add_assoc_stringl(return_value, cookie_name, cookie_value, strlen(cookie_value), 1);
efree(cookie_name);
efree(cookie_value);
}
}
}
}else{
RETURN_NULL();
}
}
ZEND_METHOD(fetch_url, responseHeaders){
zval *headers, *delim;
uint idx;
headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);
MAKE_STD_ZVAL(delim);
array_init(return_value);
ZVAL_STRING(delim, "rn", 1);
php_explode(delim, headers, return_value, LONG_MAX);
idx = zend_hash_num_elements(Z_ARRVAL_P(return_value));
zend_hash_index_del(Z_ARRVAL_P(return_value), idx-1);
zend_hash_index_del(Z_ARRVAL_P(return_value), idx-2);
zval_dtor(delim);
}
ZEND_METHOD(fetch_url, setAllowRedirect){
zval *allow;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &allow) == FAILURE){
RETURN_FALSE;
}
convert_to_long(allow);
zend_update_property_long(FETCH_THIS, ZEND_STRL("allow_redirect"), Z_LVAL_P(allow) TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setConnectTimeout){
zval *connect_timeout;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &connect_timeout) == FAILURE){
RETURN_FALSE;
}
convert_to_long(connect_timeout);
zend_update_property_long(FETCH_THIS, ZEND_STRL("connect_timeout"), Z_LVAL_P(connect_timeout) TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setCookie){
zval *zval_cookies;
zval *cookie_name, *cookie_value;
smart_str impl_cookies = {0};
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &cookie_name, &cookie_value) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(cookie_name) != IS_STRING || Z_TYPE_P(cookie_value) != IS_STRING){
RETURN_FALSE;
}
zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));
smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_name), Z_STRLEN_P(cookie_name));
smart_str_appendc(&impl_cookies, '=');
smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_value), Z_STRLEN_P(cookie_value));
smart_str_appendc(&impl_cookies, ';');
smart_str_0(&impl_cookies);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setCookies){
zval *zval_cookies;
zval *cookie_array;
smart_str impl_cookies = {0};
HashTable *cookies_ht;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &cookie_array) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(cookie_array) != IS_ARRAY){
RETURN_FALSE;
}
zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
cookies_ht = Z_ARRVAL_P(cookie_array);
smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));
for(zend_hash_internal_pointer_reset(cookies_ht);
zend_hash_has_more_elements(cookies_ht) == SUCCESS;
zend_hash_move_forward(cookies_ht))
{
zval **value;
char *key;
uint key_len;
ulong idx;
if(zend_hash_get_current_key_ex(cookies_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(cookies_ht, (void**)&value) == FAILURE){
continue;
}
convert_to_string(*value);
if(Z_TYPE_PP(value) != IS_STRING){
continue;
}
smart_str_appendl(&impl_cookies, key, key_len-1);
smart_str_appendl(&impl_cookies, "=", 1);
smart_str_appendl(&impl_cookies, Z_STRVAL_PP(value), Z_STRLEN_PP(value));
smart_str_appendl(&impl_cookies, ";", 1);
}
php_url_decode(impl_cookies.c, impl_cookies.len);
smart_str_0(&impl_cookies);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setHeader){
zval *headers, *value=NULL, *send_headers;
HashTable *headers_ht;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &headers) == FAILURE){
RETURN_FALSE;
}
send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);
if(Z_TYPE_P(send_headers) == IS_NULL){
MAKE_STD_ZVAL(send_headers);
array_init(send_headers);
}
headers_ht = Z_ARRVAL_P(send_headers);
if(Z_TYPE_P(headers) == IS_ARRAY){
for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(headers));
zend_hash_has_more_elements(Z_ARRVAL_P(headers)) == SUCCESS;
zend_hash_move_forward(Z_ARRVAL_P(headers))){
char* key;
uint key_len;
ulong idx;
zval **tmpzval;
if(zend_hash_get_current_key_ex(Z_ARRVAL_P(headers), &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(Z_ARRVAL_P(headers), (void**)&tmpzval) == FAILURE){
continue;
}
add_assoc_stringl(send_headers, key, Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 1);
}
}else if(Z_TYPE_P(headers) == IS_STRING && Z_TYPE_P(value) == IS_STRING){
add_assoc_stringl(send_headers, Z_STRVAL_P(headers), Z_STRVAL_P(value), Z_STRLEN_P(value), 1);
}else{
zend_error(E_WARNING, "param error.");
}
zend_update_property(FETCH_THIS, ZEND_STRL("send_headers"), send_headers);
}
ZEND_METHOD(fetch_url, setMethod){
zval *zval_method;
char *method;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &zval_method) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(zval_method) != IS_STRING){
RETURN_FALSE;
}
method = php_strtolower(Z_STRVAL_P(zval_method), Z_STRLEN_P(zval_method));
if(strcmp(method, "get") == 0){
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
}else if(strcmp(method, "post") == 0){
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("post") TSRMLS_CC);
}else{
FETCH_ERROR("Not support method.", 404);
RETURN_FALSE;
}
RETURN_TRUE;
}
ZEND_METHOD(fetch_url, setPostData){
zval *data, *post_data;
HashTable *post_data_ht;
smart_str temp = {0};
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &post_data) == FAILURE){
RETURN_FALSE;
}
data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);
if(Z_TYPE_P(post_data) != IS_ARRAY){
zend_error(E_WARNING, "post data must be array.");
RETURN_FALSE;
}
post_data_ht = Z_ARRVAL_P(post_data);
smart_str_appendl(&temp, Z_STRVAL_P(data), Z_STRLEN_P(data));
for(zend_hash_internal_pointer_reset(post_data_ht);
zend_hash_has_more_elements(post_data_ht) == SUCCESS;
zend_hash_move_forward(post_data_ht)){
zval **current_data;
char *key;
uint key_len;
ulong idx;
if(zend_hash_get_current_key_ex(post_data_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
continue;
}
if(zend_hash_get_current_data(post_data_ht, (void**)¤t_data) == FAILURE){
continue;
}
convert_to_string(*current_data);
smart_str_appendl(&temp, key, key_len-1);
smart_str_appendc(&temp, '=');
smart_str_appendl(&temp, Z_STRVAL_PP(current_data), Z_STRLEN_PP(current_data));
smart_str_appendc(&temp, '&');
}
smart_str_0(&temp);
zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), temp.c, temp.len TSRMLS_CC);
}
ZEND_METHOD(fetch_url, setReadTimeout){
zval *read_timeout;
if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &read_timeout) == FAILURE){
RETURN_FALSE;
}
if(Z_TYPE_P(read_timeout) != IS_LONG){
zend_error(E_WARNING, "readtimeout must be integer.");
RETURN_FALSE;
}
zend_update_property_long(FETCH_THIS, ZEND_STRL("read_timeout"), Z_LVAL_P(read_timeout) TSRMLS_CC);
}
ZEND_METHOD(fetch_url, __destruct){}
static zend_function_entry fetch_url_method[] = {
ZEND_ME(fetch_url, __construct, void_arginfo, ZEND_ACC_CTOR|ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setBinary, setBinary_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setFile, setFile_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, body, void_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, clean, void_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, errmsg, void_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, errcode, void_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, fetch, fetch_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, httpCode, void_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, responseCookies, responseCookies_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, responseHeaders, responseHeaders_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setAllowRedirect, setAllowRedirect_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setConnectTimeout, setConnectTimeout_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setCookie, setCookie_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setCookies, setCookies_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setHeader, setHeader_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setMethod, setMethod_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setPostData, setPostData, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, setReadTimeout, setReadTimeout_arginfo, ZEND_ACC_PUBLIC)
ZEND_ME(fetch_url, __destruct, void_arginfo, ZEND_ACC_DTOR|ZEND_ACC_PUBLIC)
{NULL, NULL, NULL}
};
/* If you declare any globals in php_fetch_url.h uncomment this:
ZEND_DECLARE_MODULE_GLOBALS(fetch_url)
*/
/* True global resources - no need for thread safety here */
static int le_fetch_url;
/* {{{ fetch_url_functions[]
*
* Every user visible function must have an entry in fetch_url_functions[].
*/
const zend_function_entry fetch_url_functions[] = {
PHP_FE_END /* Must be the last line in fetch_url_functions[] */
};
/* }}} */
/* {{{ fetch_url_module_entry
*/
zend_module_entry fetch_url_module_entry = {
#if ZEND_MODULE_API_NO = 20010901
STANDARD_MODULE_HEADER,
#endif
"fetch_url",
fetch_url_functions,
PHP_MINIT(fetch_url),
PHP_MSHUTDOWN(fetch_url),
PHP_RINIT(fetch_url), /* Replace with NULL if there's nothing to do at request start */
PHP_RSHUTDOWN(fetch_url), /* Replace with NULL if there's nothing to do at request end */
PHP_MINFO(fetch_url),
#if ZEND_MODULE_API_NO = 20010901
"0.1", /* Replace with version number for your extension */
#endif
STANDARD_MODULE_PROPERTIES
};
/* }}} */
#ifdef COMPILE_DL_FETCH_URL
ZEND_GET_MODULE(fetch_url)
#endif
/* {{{ PHP_INI
*/
/* Remove comments and fill if you need to have entries in php.ini
PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("fetch_url.global_value", "42", PHP_INI_ALL, OnUpdateLong, global_value, zend_fetch_url_globals, fetch_url_globals)
STD_PHP_INI_ENTRY("fetch_url.global_string", "foobar", PHP_INI_ALL, OnUpdateString, global_string, zend_fetch_url_globals, fetch_url_globals)
PHP_INI_END()
*/
/* }}} */
/* {{{ php_fetch_url_init_globals
*/
/* Uncomment this function if you have INI entries
static void php_fetch_url_init_globals(zend_fetch_url_globals *fetch_url_globals)
{
fetch_url_globals-global_value = 0;
fetch_url_globals-global_string = NULL;
}
*/
/* }}} */
/* {{{ PHP_MINIT_FUNCTION
*/
PHP_MINIT_FUNCTION(fetch_url)
{
/* If you have INI entries, uncomment these lines
REGISTER_INI_ENTRIES();
*/
zend_class_entry fetch_ce;
INIT_CLASS_ENTRY(fetch_ce, FETCH_CLASS_NAME, fetch_url_method);
g_fetch_ce = zend_register_internal_class(&fetch_ce TSRMLS_CC);
zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("body"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_null(g_fetch_ce, ZEND_STRL("errmsg"), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_long(g_fetch_ce, ZEND_STRL("errno"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_null(g_fetch_ce, ZEND_STRL("httpCode"), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("cookies"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("headers"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_null(g_fetch_ce, ZEND_STRL("send_headers"), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_long(g_fetch_ce, ZEND_STRL("allow_redirect"), 1, ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_long(g_fetch_ce, ZEND_STRL("connect_timeout"), 5, ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("method"), ZEND_STRL("get"), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_long(g_fetch_ce, ZEND_STRL("multilpart"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("data"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_long(g_fetch_ce, ZEND_STRL("read_timeout"), 60, ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_null(g_fetch_ce, ZEND_STRL("binary_data"), ZEND_ACC_PROTECTED TSRMLS_CC);
zend_declare_property_null(g_fetch_ce, ZEND_STRL("upload_filepaths"), ZEND_ACC_PROTECTED TSRMLS_CC);
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MSHUTDOWN_FUNCTION
*/
PHP_MSHUTDOWN_FUNCTION(fetch_url)
{
/* uncomment this line if you have INI entries
UNREGISTER_INI_ENTRIES();
*/
return SUCCESS;
}
/* }}} */
/* Remove if there's nothing to do at request start */
/* {{{ PHP_RINIT_FUNCTION
*/
PHP_RINIT_FUNCTION(fetch_url)
{
return SUCCESS;
}
/* }}} */
/* Remove if there's nothing to do at request end */
/* {{{ PHP_RSHUTDOWN_FUNCTION
*/
PHP_RSHUTDOWN_FUNCTION(fetch_url)
{
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MINFO_FUNCTION
*/
PHP_MINFO_FUNCTION(fetch_url)
{
php_info_print_table_start();
php_info_print_table_header(2, "fetch_url support", "enabled");
php_info_print_table_end();
/* Remove comments if you have entries in php.ini
DISPLAY_INI_ENTRIES();
*/
}
/* }}} */
源码下载地址:http://l9.yunpan.cn/lk/QEcqErTnKnHIy
来源:http://www.tulaoshi.com/n/20160219/1593220.html
看过《用C实现PHP扩展 Fetch_Url 类数据抓取的方法》的人还看了以下文章 更多>>