56 行
1.9 KiB
C++
56 行
1.9 KiB
C++
#include <catch2/catch_test_macros.hpp>
|
|
|
|
#include "csp/app_state.h"
|
|
#include "csp/services/crawler_service.h"
|
|
|
|
TEST_CASE("crawler target upsert and queue lifecycle") {
|
|
csp::AppState::Instance().Init(":memory:");
|
|
csp::services::CrawlerService svc(csp::AppState::Instance().db());
|
|
|
|
const auto first =
|
|
svc.UpsertTarget("https://Example.com/news/?a=1", "test", "u1", "tester");
|
|
REQUIRE(first.inserted);
|
|
REQUIRE(first.target.id > 0);
|
|
REQUIRE(first.target.normalized_url == "https://example.com/news");
|
|
|
|
const auto second =
|
|
svc.UpsertTarget("https://example.com/news", "test", "u1", "tester");
|
|
REQUIRE_FALSE(second.inserted);
|
|
REQUIRE(second.target.id == first.target.id);
|
|
|
|
auto listed = svc.ListTargets("", 50);
|
|
REQUIRE(listed.size() == 1);
|
|
|
|
csp::services::CrawlerTarget claimed;
|
|
REQUIRE(svc.ClaimNextTarget(claimed));
|
|
REQUIRE(claimed.id == first.target.id);
|
|
REQUIRE(claimed.status == "generating");
|
|
|
|
svc.UpdateGenerated(claimed.id, "{}", "/tmp/demo.py");
|
|
svc.MarkTesting(claimed.id);
|
|
svc.InsertRun(claimed.id, "success", 200, "{}", "");
|
|
svc.MarkActive(claimed.id, 1700000000);
|
|
|
|
const auto got = svc.GetTargetById(claimed.id);
|
|
REQUIRE(got.has_value());
|
|
REQUIRE(got->status == "active");
|
|
|
|
csp::services::CrawlerTarget due;
|
|
REQUIRE_FALSE(svc.EnqueueDueActiveTarget(3600, 1700002000, due));
|
|
REQUIRE(svc.EnqueueDueActiveTarget(3600, 1700004000, due));
|
|
REQUIRE(due.id == claimed.id);
|
|
REQUIRE(due.status == "queued");
|
|
|
|
const auto runs = svc.ListRuns(claimed.id, 20);
|
|
REQUIRE(runs.size() == 1);
|
|
REQUIRE(runs[0].status == "success");
|
|
}
|
|
|
|
TEST_CASE("crawler extract urls from mixed text") {
|
|
const auto urls = csp::services::CrawlerService::ExtractUrls(
|
|
"请收录 https://one.hao.work/path/?a=1 和 www.Example.com/docs, 谢谢");
|
|
REQUIRE(urls.size() == 2);
|
|
REQUIRE(urls[0] == "https://one.hao.work/path");
|
|
REQUIRE(urls[1] == "https://www.example.com/docs");
|
|
}
|