Skip to content

Persist crawled links to a database

composer require innmind/html:~6.3
use Innmind\Http\{
    Request,
    Method,
    ProtocolVersion,
};
use Innmind\Html\{
    Reader\Reader,
    Visitor\Elements,
    Element\A,
};
use Innmind\Url\Url;
use Innmind\Immutable\Predicate\Instance;
use Formal\AccessLayer\{
    Query\Insert,
    Table\Name,
    Row,
};

$read = Reader::default();
$sql = $os
    ->remote()
    ->sql(Url::of('mysql://127.0.0.1:3306/database_name'));

$_ = $os
    ->remote()
    ->http()(Request::of(
        Url::of('https://some-server.com/page.html')
        Method::get,
        ProtocolVersion::v11,
    ))
    ->maybe()
    ->map(static fn($success) => $success->response()->body())
    ->flatMap($read)
    ->toSequence()
    ->toSet()
    ->flatMap(Elements::of('a'))
    ->keep(Instance::of(A::class))
    ->map(static fn(A $a) => $a->href()->toString())
    ->foreach(static fn(string $href) => $sql(Insert::into(
        Name::of('table_name'),
        Row::of(['column_name' => $href]),
    )));