Skip to content

Here's a short Rust program using the microformats crate that checks the presence of a webmention on a certain page, properly resolving all URLs and even scanning HTML content in entry["properties"]["content"].

use std::cell::{RefCell, Ref};
use std::rc::Rc;

use clap::Parser;
use microformats::types::PropertyValue;
use microformats::html5ever;
use microformats::html5ever::tendril::TendrilSink;

#[derive(thiserror::Error, Debug)]
enum Error {
    #[error("http request error: {0}")]
    Http(#[from] reqwest::Error),
    #[error("microformats error: {0}")]
    Microformats(#[from] microformats::Error),
    #[error("json error: {0}")]
    Json(#[from] serde_json::Error),
    #[error("url parse error: {0}")]
    UrlParse(#[from] url::ParseError),
}

#[derive(Debug)]
enum MentionType {
    Reply,
    Like,
    Repost,
    Bookmark,
    Mention
}

fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
    // First, check the document for MF2 markup
    let document = microformats::from_html(document.as_ref(), base_url.clone())?;

    // Get an iterator of all items
    let items_iter = document.items.iter()
        .map(AsRef::as_ref)
        .map(RefCell::borrow);

    for item in items_iter {
        let props = item.properties.borrow();
        for (prop, interaction_type) in [
            ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like),
            ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost)
        ] {
            if let Some(propvals) = props.get(prop) {
                for val in propvals {
                    if let PropertyValue::Url(url) = val {
                        if url == link {
                            return Ok(Some(interaction_type))
                        }
                    }
                }
            }
        }
        // Process `content`
        if let Some(PropertyValue::Fragment(content)) = props.get("content")
            .map(Vec::as_slice)
            .unwrap_or_default()
            .first()
        {
            let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default())
                .from_utf8()
                .one(content.html.to_owned().as_bytes())
                .document;

            // This is a trick to unwrap recursion into a loop
            //
            // A list of unprocessed node is made. Then, in each
            // iteration, the list is "taken" and replaced with an
            // empty list, which is populated with nodes for the next
            // iteration of the loop.
            //
            // Empty list means all nodes were processed.
            let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect();
            while unprocessed_nodes.len() > 0 {
                // "Take" the list out of its memory slot, replace it with an empty list
                let nodes = std::mem::take(&mut unprocessed_nodes);
                for node in nodes.into_iter() {
                    // Add children nodes to the list for the next iteration
                    unprocessed_nodes.extend(node.children.borrow().iter().cloned());

                    if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data {
                        // If it's not `<a>`, skip it
                        if name.local != *"a" { continue; }
                        for attr in attrs.borrow().iter() {
                            // if it's not `<a href="...">`, skip it 
                            if attr.name.local != *"href" { continue; }
                            // Be forgiving in parsing URLs, and resolve them against the base URL
                            if let Ok(url) = base_url.join(attr.value.as_ref()) {
                                if &url == link {
                                    return Ok(Some(MentionType::Mention));
                                }
                            }
                        }
                    }
                }
            }
            
        }
    }

    Ok(None)
}

#[derive(Parser, Debug)]
#[clap(
    name = "kittybox-check-webmention",
    author = "Vika <vika@fireburn.ru>",
    version = env!("CARGO_PKG_VERSION"),
    about = "Verify an incoming webmention"
)]
struct Args {
    #[clap(value_parser)]
    url: url::Url,
    #[clap(value_parser)]
    link: url::Url
}

#[tokio::main]
async fn main() -> Result<(), self::Error> {
    let args = Args::parse();
    
    let http: reqwest::Client = {
        #[allow(unused_mut)]
        let mut builder = reqwest::Client::builder()
            .user_agent(concat!(
                env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")
            ));

        builder.build().unwrap()
    };

    let response = http.get(args.url.clone()).send().await?;
    let text = response.text().await?;
    
    if let Some(mention_type) = check_mention(text, &args.url, &args.link)? {
        println!("{:?}", mention_type);

        Ok(())
    } else {
        std::process::exit(1)
    }
}