#include <iostream>
#include <string>
#include <thread>
#include <mutex>
#include <fstream>
#include <random>
#include <curl/curl.h>
#include<atomic>
using namespace std;
//爬取的url
const string url = "https://www.dmoe.cc/random.php";
//爬取图片线程数
const int _n1 = 2;
//每个线程爬取次数
const int _n2 = 10;
//记录爬取次数
atomic<int> _n3(0);
//设置curl基本信息
void set_basic_curl(CURL* curl, const string& url) noexcept
{
//设置url
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
/*根证书是用于验证其他证书有效性的最高级别证书。它由受信任的证书颁发机构(CA)签发,并用于构建证书链来验证远程服务器的证书。
根证书包含了多个 X.509 格式的证书,这些证书由公共的证书机构签发。这些证书被称为“根证书”,因为它们构成了证书链中的最高级别。
libcurl 使用提供的根证书文件来验证远程服务器证书的可信性。在 TLS 握手阶段,libcurl 将验证服务器证书的签名是否能够追溯到根证书。
因此,通过在 libcurl 中设置 CURLOPT_CAINFO 选项为该根证书文件,你可以确保 libcurl 能够正确验证远程服务器证书的有效性。*/
//设置curl使用的根证书文件,用于https连接来认证对方服务器身份
curl_easy_setopt(curl, CURLOPT_CAINFO, "cacert.pem");
/*User-Agent字段向服务器发送,包含了访问者系统引擎版本、浏览器信息的字段信息。一般服务器识别出是爬虫请求,会拒绝访问。
所以此时设置User - Agent,可以将爬虫伪装成用户通过浏览器访问。*/
// 设置 User-Agent 字段
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36");
// 设置 Referer 字段
curl_easy_setopt(curl, CURLOPT_REFERER, "https://www.baidu.com");
//设置HTTP请求版本为1.1
curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
//启用重定向
//重定向是指在发起 HTTP 请求时,服务器返回一个特殊的响应状态码(如 301 或 302),告诉客户端需要将请求重定向到另一个 URL。通常,这会导致客户端重新发起一个新的请求到重定向的 URL 上。
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
}
//写入到文件回调函数
size_t write_file(char* ptr, size_t size, size_t nmemb, void* des) noexcept
{
fstream* file = static_cast<fstream*>(des);
if (file)
{
file->write(ptr, size * nmemb);
}
return size * nmemb;
}
//保存图片线程入口函数
void save_img() noexcept
{
//循环爬取
for (int j = 0; j < _n2; ++j)
{
//开始爬取图片
CURL* curl = curl_easy_init();
if (curl)
{
set_basic_curl(curl, url);
string path = "C:\\Users\\laijian\\Desktop\\img\\" + to_string(_n3++) + ".jpg";
fstream file(path, ios::binary | ios::out);
if (file.is_open())
{
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_file);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file);
CURLcode res = curl_easy_perform(curl);
if (res == CURLE_OK)
{
cout << url << " > " << path << " \033[1;32msuccessfully !!!\033[0m" << endl;
}
}
curl_easy_cleanup(curl);
}
}
}
int main()
{
//初始化curl库
curl_global_init(CURL_GLOBAL_DEFAULT);
cout << "\033[31mStart !!!\033[0m" << endl;
vector<thread> threads;
for (int i = 0; i < _n1; ++i)
{
threads.push_back(thread(save_img));
}
for (auto& i : threads)
{
i.join();
}
// 清理libcurl全局环境
curl_global_cleanup();
cout << "\033[31mEnd !!!\033[0m" << endl;
return 0;
}
#include <iostream>
#include <string>
#include <thread>
#include <mutex>
#include <vector>
#include <condition_variable>
#include <cstring>
#include <sys/types.h>
#include <sys/stat.h>
#include <fstream>
#include <random>
#include <curl/curl.h>
#include <atomic>
#include <regex>
using namespace std;
int i = 0;
size_t write_str(char* ptr, size_t size, size_t nmeb, void* des)
{
string* str = static_cast<string*>(des);
if (str)
{
str->append(ptr, size * nmeb);
}
return size * nmeb;
}
size_t write_file(char* ptr, size_t size, size_t nmeb, void* des)
{
fstream* str = static_cast<fstream*>(des);
if (str)
{
str->write(ptr, size * nmeb);
}
return size * nmeb;
}
//设置curl基本信息
void set_basic_curl(CURL* curl, const string& url) noexcept
{
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_CAINFO, "cacert.pem");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36");
curl_easy_setopt(curl, CURLOPT_REFERER, "https://www.baidu.com");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
}
int main()
{
std::cout << "\033[32m";
//瑟图网站url
string url = "https://sex.nyan.xyz/api/v2/?num=10&tag=fate";
curl_global_init(CURL_GLOBAL_DEFAULT);
regex rule("https://sex.nyan.xyz/.*?\.((png)|(jpg))");
smatch results;
CURL* curl = curl_easy_init();
if (curl)
{
string s;
set_basic_curl(curl, url);
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, write_str);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &s);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_str);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &s);
CURLcode res = curl_easy_perform(curl);
if (res == CURLE_OK)
{
for (sregex_iterator it(s.begin(), s.end(), rule), end; it != end; ++it)
{
string path = "D:\\setu\\" + to_string(i++);
if (it->str()[it->str().size() - 3] == 'p')
{
path += ".png";
}
else
{
path += ".jpg";
}
fstream file(path,ios::binary | ios::out);
if (file.is_open())
{
CURL* curl2 = curl_easy_init();
if (curl2)
{
set_basic_curl(curl2, it->str());
curl_easy_setopt(curl2, CURLOPT_WRITEFUNCTION, write_file);
curl_easy_setopt(curl2, CURLOPT_WRITEDATA, &file);
CURLcode res2 = curl_easy_perform(curl2);
if (res2 == CURLE_OK)
{
cout << it->str() << " > " << path << endl;
}
curl_easy_cleanup(curl2);
}
file.close();
}
}
}
curl_easy_cleanup(curl);
}
curl_global_cleanup();
cout << "\033[0m";
return 0;
}
g++ main.cpp -o main -w -std=c++2a -O2 -lpthread -lssl -lcrypto -lcurl
Comments NOTHING