Here's a short Rust program using the microformats
crate that checks the presence of a webmention on a certain page, properly resolving all URLs and even scanning HTML content in entry["properties"]["content"]
.
use std::cell::{RefCell, Ref};
use std::rc::Rc;
use clap::Parser;
use microformats::types::PropertyValue;
use microformats::html5ever;
use microformats::html5ever::tendril::TendrilSink;
#[derive(thiserror::Error, Debug)]
enum Error {
#[error("http request error: {0}")]
Http(#[from] reqwest::Error),
#[error("microformats error: {0}")]
Microformats(#[from] microformats::Error),
#[error("json error: {0}")]
Json(#[from] serde_json::Error),
#[error("url parse error: {0}")]
UrlParse(#[from] url::ParseError),
}
#[derive(Debug)]
enum MentionType {
Reply,
Like,
Repost,
Bookmark,
Mention
}
fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
// First, check the document for MF2 markup
let document = microformats::from_html(document.as_ref(), base_url.clone())?;
// Get an iterator of all items
let items_iter = document.items.iter()
.map(AsRef::as_ref)
.map(RefCell::borrow);
for item in items_iter {
let props = item.properties.borrow();
for (prop, interaction_type) in [
("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like),
("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost)
] {
if let Some(propvals) = props.get(prop) {
for val in propvals {
if let PropertyValue::Url(url) = val {
if url == link {
return Ok(Some(interaction_type))
}
}
}
}
}
// Process `content`
if let Some(PropertyValue::Fragment(content)) = props.get("content")
.map(Vec::as_slice)
.unwrap_or_default()
.first()
{
let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default())
.from_utf8()
.one(content.html.to_owned().as_bytes())
.document;
// This is a trick to unwrap recursion into a loop
//
// A list of unprocessed node is made. Then, in each
// iteration, the list is "taken" and replaced with an
// empty list, which is populated with nodes for the next
// iteration of the loop.
//
// Empty list means all nodes were processed.
let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect();
while unprocessed_nodes.len() > 0 {
// "Take" the list out of its memory slot, replace it with an empty list
let nodes = std::mem::take(&mut unprocessed_nodes);
for node in nodes.into_iter() {
// Add children nodes to the list for the next iteration
unprocessed_nodes.extend(node.children.borrow().iter().cloned());
if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data {
// If it's not `<a>`, skip it
if name.local != *"a" { continue; }
for attr in attrs.borrow().iter() {
// if it's not `<a href="...">`, skip it
if attr.name.local != *"href" { continue; }
// Be forgiving in parsing URLs, and resolve them against the base URL
if let Ok(url) = base_url.join(attr.value.as_ref()) {
if &url == link {
return Ok(Some(MentionType::Mention));
}
}
}
}
}
}
}
}
Ok(None)
}
#[derive(Parser, Debug)]
#[clap(
name = "kittybox-check-webmention",
author = "Vika <vika@fireburn.ru>",
version = env!("CARGO_PKG_VERSION"),
about = "Verify an incoming webmention"
)]
struct Args {
#[clap(value_parser)]
url: url::Url,
#[clap(value_parser)]
link: url::Url
}
#[tokio::main]
async fn main() -> Result<(), self::Error> {
let args = Args::parse();
let http: reqwest::Client = {
#[allow(unused_mut)]
let mut builder = reqwest::Client::builder()
.user_agent(concat!(
env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")
));
builder.build().unwrap()
};
let response = http.get(args.url.clone()).send().await?;
let text = response.text().await?;
if let Some(mention_type) = check_mention(text, &args.url, &args.link)? {
println!("{:?}", mention_type);
Ok(())
} else {
std::process::exit(1)
}
}