Files
Older/Tools/UrlCheck.cpp
amass f6d37431f8
Some checks failed
Deploy / Build (push) Failing after 2m54s
add url check.
2025-06-29 16:25:27 +08:00

161 lines
6.0 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "Base/Database.h"
#include <boost/asio/io_context.hpp>
#include <boost/asio/ip/tcp.hpp>
#include <boost/asio/ssl/context.hpp>
#include <boost/beast/core/flat_buffer.hpp>
#include <boost/beast/core/tcp_stream.hpp>
#include <boost/beast/http/message.hpp>
#include <boost/beast/http/read.hpp>
#include <boost/beast/http/string_body.hpp>
#include <boost/beast/http/write.hpp>
#include <boost/beast/ssl/ssl_stream.hpp>
#include <boost/beast/version.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/program_options/variables_map.hpp>
#include <filesystem>
#include <iostream>
bool isUrlValid(boost::asio::io_context &ioContext, const std::string &host, const std::string &port, const std::string &target);
bool isPathValid(const std::string &docRoot, const std::string &target);
// ./UrlCheck -d ./database.sqlite -r ./amass_blog --delete-invalid=true --delete="/" --delete="/login" --delete="/MessageBoard" --delete="/我的博客"
int main(int argc, char const *argv[]) {
boost::program_options::options_description description("Allowed options");
// clang-format off
description.add_options()
("help,h", "produce help message.")
("database,d", boost::program_options::value<std::string>(),"set database path")
("docroot,r", boost::program_options::value<std::string>(),"set docroot path")
("delete", boost::program_options::value<std::vector <std::string>>(),"set docroot path")
("delete-invalid", boost::program_options::value<bool>()->default_value(false),"delete invalid url");
// clang-format on
boost::program_options::variables_map values;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, description), values);
boost::program_options::notify(values);
std::string path;
std::string docRoot;
std::vector<std::string> removeItems;
if (values.count("help")) {
std::cout << description << std::endl;
std::exit(0);
}
if (values.count("database")) {
path = values.at("database").as<std::string>();
}
if (values.count("docroot")) {
docRoot = values.at("docroot").as<std::string>();
}
if (values.count("delete")) {
removeItems = values.at("delete").as<std::vector<std::string>>();
}
if (path.empty()) {
std::cerr << "please specify the database path." << std::endl;
std::cout << description << std::endl;
return 1;
} else if (!std::filesystem::exists(path)) {
std::cerr << "database file " << path << " not existed." << std::endl;
return 2;
}
if (docRoot.empty()) {
std::cerr << "please specify the doc root." << std::endl;
std::cout << description << std::endl;
return 1;
} else if (!std::filesystem::exists(docRoot)) {
std::cerr << "doc root " << docRoot << " not existed." << std::endl;
return 2;
}
if (!removeItems.empty()) {
std::cout << "remove:\r " << std::endl;
for (auto &item : removeItems) {
std::cout << item << std::endl;
}
}
Older::Database database;
if (!database.open(path)) {
return 3;
}
boost::asio::io_context ioContext;
auto items = database.visitRecords();
for (auto &item : items) {
bool needDelete = std::find(removeItems.cbegin(), removeItems.cend(), item.url) != removeItems.cend();
if (!needDelete) {
needDelete = !isPathValid(docRoot, item.url) && values.at("delete-invalid").as<bool>();
}
// bool valid = isUrlValid(ioContext, "amass.fun", "443", item.url);
std::cout << item.url << std::endl;
if (needDelete) {
std::cout << "delete: " << database.removeVisitRecord(item.id) << std::endl;
}
std::cout << "----------" << std::endl;
}
return 0;
}
bool isPathValid(const std::string &docRoot, const std::string &target) {
return std::filesystem::exists(docRoot + target);
}
bool isUrlValid(boost::asio::io_context &ioContext, const std::string &host, const std::string &port, const std::string &target) {
using namespace boost;
using namespace boost::asio;
using namespace boost::asio::ip;
using namespace boost::beast;
try {
// 1. 创建SSL上下文
ssl::context ssl_ctx(ssl::context::tlsv12_client);
ssl_ctx.set_default_verify_paths();
ssl_ctx.set_verify_mode(ssl::verify_peer);
// 2. 创建TCP解析器和SSL流
tcp::resolver resolver(ioContext);
beast::ssl_stream<beast::tcp_stream> stream(ioContext, ssl_ctx);
// 3. 设置SNI主机名重要
if (!SSL_set_tlsext_host_name(stream.native_handle(), host.c_str())) {
throw boost::system::system_error(::ERR_get_error(), boost::asio ::error::get_ssl_category());
}
// 4. 解析主机名并建立连接
auto const results = resolver.resolve(host, port);
beast::get_lowest_layer(stream).connect(results);
stream.handshake(ssl::stream_base::client);
// 5. 构造并发送HEAD请求
http::request<http::string_body> req{http::verb::get, target, 11};
req.set(http::field::host, host);
req.set(http::field::user_agent, BOOST_BEAST_VERSION_STRING);
http::write(stream, req);
// 6. 读取响应
beast::flat_buffer buffer;
http::response<http::string_body> res;
http::read(stream, buffer, res);
// 7. 检查HTTP状态码2xx/3xx视为可访问
const unsigned status = res.result_int();
const bool accessible = (status >= 200 && status < 400);
// 8. 优雅关闭连接
beast::error_code ec;
stream.shutdown(ec);
if (ec == net::error::eof || ec == boost::asio::ssl::error::stream_truncated) {
ec = {};
}
return accessible;
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
return false;
}
}