add url check.
Some checks failed
Deploy / Build (push) Failing after 2m54s

This commit is contained in:
2025-06-29 16:25:27 +08:00
parent f5c76b931f
commit f6d37431f8
6 changed files with 103 additions and 5 deletions

View File

@ -17,6 +17,9 @@
#include <iostream>
bool isUrlValid(boost::asio::io_context &ioContext, const std::string &host, const std::string &port, const std::string &target);
bool isPathValid(const std::string &docRoot, const std::string &target);
// ./UrlCheck -d ./database.sqlite -r ./amass_blog --delete-invalid=true --delete="/" --delete="/login" --delete="/MessageBoard" --delete="/我的博客"
int main(int argc, char const *argv[]) {
boost::program_options::options_description description("Allowed options");
@ -24,6 +27,8 @@ int main(int argc, char const *argv[]) {
description.add_options()
("help,h", "produce help message.")
("database,d", boost::program_options::value<std::string>(),"set database path")
("docroot,r", boost::program_options::value<std::string>(),"set docroot path")
("delete", boost::program_options::value<std::vector <std::string>>(),"set docroot path")
("delete-invalid", boost::program_options::value<bool>()->default_value(false),"delete invalid url");
// clang-format on
boost::program_options::variables_map values;
@ -31,13 +36,23 @@ int main(int argc, char const *argv[]) {
boost::program_options::notify(values);
std::string path;
std::string docRoot;
std::vector<std::string> removeItems;
if (values.count("help")) {
std::cout << description << std::endl;
std::exit(0);
} else if (values.count("database")) {
}
if (values.count("database")) {
path = values.at("database").as<std::string>();
}
if (values.count("docroot")) {
docRoot = values.at("docroot").as<std::string>();
}
if (values.count("delete")) {
removeItems = values.at("delete").as<std::vector<std::string>>();
}
if (path.empty()) {
std::cerr << "please specify the database path." << std::endl;
@ -48,6 +63,22 @@ int main(int argc, char const *argv[]) {
return 2;
}
if (docRoot.empty()) {
std::cerr << "please specify the doc root." << std::endl;
std::cout << description << std::endl;
return 1;
} else if (!std::filesystem::exists(docRoot)) {
std::cerr << "doc root " << docRoot << " not existed." << std::endl;
return 2;
}
if (!removeItems.empty()) {
std::cout << "remove:\r " << std::endl;
for (auto &item : removeItems) {
std::cout << item << std::endl;
}
}
Older::Database database;
if (!database.open(path)) {
return 3;
@ -56,10 +87,14 @@ int main(int argc, char const *argv[]) {
boost::asio::io_context ioContext;
auto items = database.visitRecords();
for (auto &item : items) {
bool valid = isUrlValid(ioContext, "amass.fun", "443", item.url);
bool needDelete = std::find(removeItems.cbegin(), removeItems.cend(), item.url) != removeItems.cend();
if (!needDelete) {
needDelete = !isPathValid(docRoot, item.url) && values.at("delete-invalid").as<bool>();
}
// bool valid = isUrlValid(ioContext, "amass.fun", "443", item.url);
std::cout << item.url << std::endl;
std::cout << "valid: " << valid << std::endl;
if (!valid && values.at("delete-invalid").as<bool>()) {
if (needDelete) {
std::cout << "delete: " << database.removeVisitRecord(item.id) << std::endl;
}
std::cout << "----------" << std::endl;
@ -68,6 +103,10 @@ int main(int argc, char const *argv[]) {
return 0;
}
bool isPathValid(const std::string &docRoot, const std::string &target) {
return std::filesystem::exists(docRoot + target);
}
bool isUrlValid(boost::asio::io_context &ioContext, const std::string &host, const std::string &port, const std::string &target) {
using namespace boost;
using namespace boost::asio;