Category Archives: WordPress

WordPress Logo

Prepend a Taxonomy to the WordPress Permalink Structure

I was tasked with creating a WordPress site that displays blog posts for different suburbs. Each of these suburbs is required to have its own homepage, with only relevant posts for the suburb listed.

Isn’t that what WordPress Multisite is for?

Each site in a WordPress Multisite network is managed individually. To send a post to another site in the network, you need to use a plugin like Broadcast. For a small number of sites, this is ok. But what if you have hundreds of sites? The answer is obvious. If we use WordPress Multisite, this is going to become really hard to maintain in the long run.

How does it work?

The WordPress permalink structure and rewrite rules will need to be customised in order to make this work. We want to be able to assign posts to taxonomy terms, and have the post list change based on that value.

A default permalink structure might look like this:

/%year%/%monthnum%/%day%/%postname%/ 

What we need to achieve is:

/%suburb_slug%/%year%/%monthnum%/%day%/%postname%/

The value of %suburb_slug% will be used to limit the posts displayed using the WordPress query.

yoursite.com – displays all posts.
yoursite.com/suburb – displays posts for the suburb.

Just show me the code.

The sample code below can be added to your themes functions.php file. Please be aware that it will not cover all edge cases that you may run into when creating this type of WordPress website.

Remember, whenever you make a change to the permalink structure, you need to save the Permalinks Settings page to flush the rewrite rules.

/**
 * @package WordPress
 * @subpackage Prepend a Taxonomy to the WordPress Permalink Structure
 * @author That Stevens Guy
 * @phpcs:disable PSR1.Files.SideEffects
 *
 * wp-config.php:
 *      // Add a main suburb for the homepage to the Suburbs taxonomy,
 *      // define the slug chosen here.
 *      define('MAIN_SUBURB_SLUG', 'suburb');
 *      define('DEBUG_REWRITES', false);
 */

/**
 * Initialise custom suburb permalink.
 * Note: Save permalinks settings page to flush rewrites.
 *
 * @return void
 */
add_action('init', 'tsg_prepare_permalink');
function tsg_prepare_permalink(): void
{
    add_action('parse_request', 'tsg_debug_rewrites');

    tsg_add_rewrites();

    add_filter('rewrite_rules_array', 'tsg_customise_rewrite_rules_array');
    add_filter('request', 'tsg_customise_request_query_vars');
    add_action('template_redirect', 'tsg_verify_query_vars');
    add_filter('query_vars', 'tsg_add_query_var');
    add_filter('available_permalink_structure_tags', 'tsg_add_permalink_structure_tag');

    add_filter('page_link', 'tsg_customise_page_link');
    add_filter('post_link', 'tsg_customise_link');
    add_filter('post_comments_feed_link', 'tsg_customise_link');
    add_filter('post_type_link', 'tsg_customise_link');
    add_filter('post_type_archive_link', 'tsg_customise_link');
    add_filter('term_link', 'tsg_customise_link');
    add_filter('author_link', 'tsg_customise_link');
    add_filter('day_link', 'tsg_customise_link');
    add_filter('month_link', 'tsg_customise_link');
    add_filter('year_link', 'tsg_customise_link');
    add_filter('feed_link', 'tsg_customise_link');
    add_filter('attachment_link', '__return_empty_string');
}

/**
 * Add rewrite rules for the query variable %suburb_slug%.
 *
 * @return void
 */
function tsg_add_rewrites(): void
{
    global $wp_rewrite;

    $wp_rewrite->set_permalink_structure('/%suburb_slug%/%year%/%monthnum%/%day%/%postname%/');
    $wp_rewrite->set_category_base('%suburb_slug%/category');
    $wp_rewrite->set_tag_base('%suburb_slug%/tag');
    $wp_rewrite->author_base = '%suburb_slug%/' . $wp_rewrite->author_base;
    $wp_rewrite->comments_base = '%suburb_slug%/' . $wp_rewrite->comments_base;
    $wp_rewrite->date_structure = '%suburb_slug%/%year%/%monthnum%/%day%';
    $wp_rewrite->page_structure = '%suburb_slug%/%pagename%';

    add_rewrite_tag('%suburb_slug%', '([^/]+)', 'suburb_slug=');

    $feedregex = tsg_get_feedregex();
    $feedregex2 = tsg_get_feedregex(2);

    // Fix root rules.
    add_rewrite_rule('^sitemap\.xml$', 'index.php?sitemap=index', 'top');
    add_rewrite_rule('^wp-register\.php$', 'index.php?register=true', 'top');
    add_rewrite_rule('^wp-app\.php(/.*)?$', 'index.php?error=403', 'top');
    add_rewrite_rule('^wp-(atom|rdf|rss|rss2|feed|commentsrss2)\\.php$', 'index.php?feed=old', 'top');
    add_rewrite_rule("^$feedregex", 'index.php?feed=$matches[1]', 'top');
    add_rewrite_rule("^$feedregex2", 'index.php?feed=$matches[1]', 'top');
    add_rewrite_rule("^embed/?$", 'index.php?embed=true', 'top');
    add_rewrite_rule('^page/?([0-9]{1,})/?$', 'index.php?paged=$matches[1]', 'top');
    add_rewrite_rule("^comments/$feedregex", 'index.php?feed=$matches[1]&withcomments=1', 'top');
    add_rewrite_rule("^comments/$feedregex2", 'index.php?feed=$matches[1]&withcomments=1', 'top');
    add_rewrite_rule('^comments/embed/?$', 'index.php?pagename=comments&embed=true', 'top');
    add_rewrite_rule('([^/]+)/trackback/?$', 'index.php?suburb_slug=$matches[1]&tb=1', 'top');
    add_rewrite_rule('([^/]+)(?:/([0-9]+))?/?$', 'index.php?suburb_slug=$matches[1]&page=$matches[2]', 'top');

    // Fix %suburb_slug% rules.
    add_rewrite_rule('([^/]+)/comments/?$', 'index.php?suburb_slug=$matches[1]&pagename=comments', 'bottom');
    add_rewrite_rule('([^/]+)/comments/embed/?$', 'index.php?suburb_slug=$matches[1]&pagename=comments&embed=true', 'bottom');
    add_rewrite_rule('([^/]+)/author/?$', 'index.php?suburb_slug=$matches[1]&pagename=author', 'bottom');
    add_rewrite_rule('([^/]+)/tag/?$', 'index.php?suburb_slug=$matches[1]&pagename=tag', 'bottom');
    add_rewrite_rule('([^/]+)/category/?$', 'index.php?suburb_slug=$matches[1]&pagename=category', 'bottom');
}

/**
 * Customise rewrite rules.
 *
 * @param array $rules
 * @return array
 */
function tsg_customise_rewrite_rules_array(array $rules): array
{
    $post_types = get_post_types([
        'has_archive' => true,
        '_builtin' => false
    ]);

    // Custom post types don't apply the filter to replace structure tags
    // with regex for the array keys. Substitute our own,
    // without changing the array key position.
    $is_wrong = false;
    foreach ($post_types as $post_type) {
        foreach ($rules as $rule => $query) {
            if (strpos($rule, "%suburb_slug%/$post_type") !== false) {
                $is_wrong = true;
                $new_rule = str_replace('%suburb_slug%', '([^/]+)', $rule);
                $rules = tsg_replace_key($rules, $rule, $new_rule);
            }
        }
    }

    // After the above fix, the queries are messed up, correct them.
    if ($is_wrong) {
        foreach ($post_types as $post_type) {
            foreach ($rules as $rule => $query) {
                if (
                    strpos($rule, "([^/]+)/$post_type") !== false &&
                    strpos($rule, "([^/]+)/$post_type/category") === false &&
                    strpos($rule, "([^/]+)/$post_type/([^/]+)") === false &&
                    strpos($rule, "([^/]+)/$post_type/page/?") === false
                ) {
                    if (strpos($rule, $post_type . '/?$') !== false) {
                        $rules[$rule] =
                            'index.php?suburb_slug=$matches[1]&post_type=' . $post_type;
                    } elseif (strpos($rule, '/feed') !== false || strpos($rule, '/(feed') !== false) {
                        $rules[$rule] =
                            'index.php?suburb_slug=$matches[1]&post_type=' . $post_type . '&feed=$matches[2]';
                    } elseif (strpos($rule, '/page') !== false) {
                        $rules[$rule] =
                            'index.php?suburb_slug=$matches[1]&post_type=' . $post_type . '&paged=$matches[2]';
                    }
                }
            }
        }
    }

    // Remove rewrite rules that don't work,
    // or that we can't be bothered dealing with.
    $remove_by_rule = [
        'amp_',
        'type',
        'search',
        '.*wp-(atom|rdf|rss|rss2|feed|commentsrss2)\\.php$',
        '.*wp-app\\.php(/.*)?$',
        '.*wp-register.php$'
    ];
    $remove_by_query = [
        'attachment',
        '?&'
    ];
    foreach ($rules as $rule => $query) {
        foreach ($remove_by_rule as $match) {
            if (strpos($rule, $match) !== false) {
                unset($rules[$rule]);
            }
        }
        foreach ($remove_by_query as $match) {
            if (strpos($query, $match) !== false) {
                unset($rules[$rule]);
            }
        }
    }

    return $rules;
}

/**
 * Fix request query variables.
 *
 * @param array $query_vars
 * @return array
 */
function tsg_customise_request_query_vars(array $query_vars): array
{
    // Fix root pages.
    // If 'suburb_slug' doesn't exist, send them to a page.
    if (
        isset($query_vars['suburb_slug']) &&
        !array_key_exists($query_vars['suburb_slug'], tsg_get_all_suburbs_by_slug())
    ) {
        if (!isset($query_vars['pagename'])) {
            // Fix root pages.
            $query_vars['pagename'] = $query_vars['suburb_slug'];
        } else {
            // Fix root child pages.
            $query_vars['pagename'] = $query_vars['suburb_slug'] . '/' . $query_vars['pagename'];
        }
    }

    return $query_vars;
}

/**
 * The %suburb_slug%/%pagename% query variable has no effect on the
 * 404 status of posts for some reason.
 *
 * @return void
 */
function tsg_verify_query_vars(): void
{
    global $wp_query;

    $slug = get_query_var('pagename');
    if (
        $slug &&
        is_single() &&
        !array_key_exists($slug, tsg_get_all_suburbs_by_slug())
    ) {
        $wp_query->set_404();
        status_header(404);
        nocache_headers();
    }
}

/**
 * Add query variable %suburb_slug%.
 *
 * @param array $public_query_vars
 * @return array
 */
function tsg_add_query_var(array $public_query_vars): array
{
    $public_query_vars[] = 'suburb_slug';
    return $public_query_vars;
}

/**
 * Add the permalink structure tag %suburb_slug% to the admin page.
 *
 * @param array $tags
 * @return array
 */
function tsg_add_permalink_structure_tag(array $tags): array
{
    $tags['suburb_slug'] = __('%s (The slug of the Suburb.)');
    return $tags;
}

/**
 * Replace %suburb_slug% with the current suburb slug.
 *
 * @param string $link
 * @return string
 */
function tsg_customise_link(string $link): string
{
    return str_replace('%suburb_slug%', tsg_get_current_suburb_slug(), $link);
}

/**
 * Replace main suburb slug with root, or alternate slug if provided.
 *
 * @param string $link
 * @param string $slug
 * @return string
 */
function tsg_customise_link_slug(string $link, string $slug = '/'): string
{
    $slug = $slug !== '/' ? "/$slug/" : $slug;
    return str_replace('/' . MAIN_SUBURB_SLUG . '/', $slug, $link);
}

/**
 * Replace %suburb_slug% and main suburb slug in page links.
 *
 * @param string $link
 * @return string
 */
function tsg_customise_page_link(string $link): string
{
    return tsg_customise_link_slug(tsg_customise_link($link));
}

/**
 * Grab the slug of the current suburb.
 *
 * @param bool $echo
 * @return string|void
 */
function tsg_get_current_suburb_slug(bool $echo = false)
{
    $slug = MAIN_SUBURB_SLUG;

    $query_var = get_query_var('suburb_slug');
    if (!empty($query_var) && array_key_exists($query_var, tsg_get_all_suburbs_by_slug())) {
        $slug = $query_var;
    }

    if ($echo) {
        echo $slug;
    } else {
        return $slug;
    }
}

/**
 * Are we on the main suburb?
 *
 * @param string $slug
 * @return bool
 */
function tsg_is_main_suburb(string $slug = ''): bool
{
    if ($slug) {
        return $slug === MAIN_SUBURB_SLUG;
    }

    return tsg_get_current_suburb_slug() === MAIN_SUBURB_SLUG;
}

/**
 * Return the current suburb.
 *
 * @return WP_Term object or false on failure
 */
function tsg_get_current_suburb()
{
    return tsg_get_suburb_by_slug(tsg_get_current_suburb_slug());
}

/**
 * Retrieve a suburb object by slug.
 *
 * @param string $slug
 * @return WP_Term object or false on failure
 */
function tsg_get_suburb_by_slug(string $slug)
{
    $terms = tsg_get_all_suburbs_by_slug();

    if (isset($terms[ $slug ])) {
        return $terms[ $slug ];
    }

    return false;
}

/**
 * Returns a list of Suburbs in the form of
 * [
 *     term_id => WP_Term {},
 *     ...
 * ]
 *
 * @return array
 */
function tsg_get_all_suburbs(): array
{
    $result = get_option('tsg_suburbs');

    if (!$result) {
        $terms = get_terms('suburb', [ 'hide_empty' => false ]);

        if (empty($terms) && defined('MAIN_SUBURB_SLUG')) {
            wp_insert_term(MAIN_SUBURB_SLUG, 'suburb');

            $terms = get_terms([
                'taxonomy' => 'suburb',
                'hide_empty' => false
            ]);
        }

        $result = [];
        foreach ($terms as $term) {
            $result[ $term->term_id ] = $term;
        }

        update_option('tsg_suburbs', $result, false);
    }

    return $result;
}

/**
 * Returns a list of Suburbs in the form of
 * [
 *     term_slug => WP_Term {},
 *     ...
 * ]
 *
 * @return array
 */
function tsg_get_all_suburbs_by_slug(): array
{
    $result = get_option('tsg_suburbs_by_slug');

    if (!$result) {
        $terms = tsg_get_all_suburbs();

        $result = [];
        foreach ($terms as $term) {
            $result[ $term->slug ] = $term;
        }

        update_option('tsg_suburbs_by_slug', $result, false);
    }

    return $result;
}

/**
 * Clear cached suburb lists.
 *
 * @return void
 */
add_action('edited_suburb', 'tsg_update_suburb', 11);
add_action('create_suburb', 'tsg_update_suburb', 11);
function tsg_update_suburb(): void
{
    delete_option('tsg_suburbs');
    delete_option('tsg_suburbs_by_slug');

    if (function_exists('w3tc_flush_all')) {
        w3tc_flush_all();
    }
}

/**
 * Register Suburbs taxonomy.
 *
 * @return @void
 */
add_action('after_setup_theme', function (): void {
    register_taxonomy(
        'suburb',
        [ 'post' ],
        [
            'label' => 'Suburbs',
            'labels' => [
                'name' => 'Suburbs',
                'singular_name' => 'Suburb',
                'add_new' => 'Add New',
                'add_new_item' => 'Add New Suburb',
                'edit_item' => 'Edit Suburb',
                'new_item' => 'New Suburb',
                'view_item' => 'View Suburb',
                'search_items' => 'Search Suburbs',
                'not_found' =>  'Nothing Found',
                'not_found_in_trash' => 'Nothing found in the Trash',
                'parent_item_colon' => ''
            ],
            'publicly_queryable' => false,
            'has_archive' => false,
            'rewrite' => false,
            'hierarchical' => true,
            'show_in_nav_menus' => false,
            'show_tagcloud' => false,
            'show_admin_column' => false,
            'capabilities' => [ 'assign_terms' => 'edit_posts' ],
            'show_in_rest' => true
        ]
    );
});

/**
 * Limit posts to just those for the current suburb.
 *
 * @param WP_Query $query Query object before WP_Query is called.
 * @return WP_Query
 */
add_action('pre_get_posts', 'tsg_pre_get_posts');
function tsg_pre_get_posts(WP_Query $query): WP_Query
{
    if ($query->is_admin) {
        return $query;
    }

    // Bypass this entirely for menus.
    if (
        isset($query->query[ 'post_type' ]) &&
        $query->query[ 'post_type' ] === 'nav_menu_item'
    ) {
        return $query;
    }

    $suburb = tsg_get_current_suburb();

    if (empty($suburb)) {
        $query->set('pagename', '');
        $query->set_404();
        status_header(404);
        nocache_headers();

        return $query;
    }

    // Main suburb sees all posts.
    if (tsg_is_main_suburb($suburb->slug)) {
        return $query;
    }

    // Apply the tax query.
    $query->set('tax_query', [
        'relation' => 'AND',
        [
            'taxonomy' => 'suburbs',
            'field' => 'slug',
            'include_children' => false,
            'terms' => [ tsg_get_current_suburb_slug() ],
            'operator' => 'IN'
        ]
    ]);

    return $query;
}

/**
 * Debug rewrite rules.
 * Based on: https://gist.github.com/adamrosloniec/e34fcc7a0743769c75db1b072d677946
 *
 * @param WP $query
 * @return void
 */
function tsg_debug_rewrites(WP $query): void
{
    global $wp_rewrite, $wp_post_types, $wp_taxonomies;

    if (empty(DEBUG_REWRITES)) {
        return;
    }

    if (is_admin() || !is_user_logged_in()) {
        return;
    }

    echo '<p><strong>--- START REWRITE DEBUG ---</strong></p>';

    echo '<h2>Rewrite Rules</h2><table style="font-size:1em;">' .
        '<tr><th align="left">Rule</th><th align="left">Query</th></tr>';
    foreach ($wp_rewrite->wp_rewrite_rules() as $rule => $match) {
        $rewrite_bg = $rule === $query->matched_rule ? 'style="background:yellow;"' : '';
        echo "<tr $rewrite_bg><td>" .
            var_export($rule, true) . "</td><td>$match</td></tr>";
    }
    echo '</table>';

    echo '<h2>Permalink Structure</h2><table style="font-size:1em;">' .
        '<tr><th align="left" colspan="2">Post Type</th></tr>' .
        '<tr><td>Page</td><td>' . $wp_rewrite->get_page_permastruct() . '</td></tr>' .
        "<tr><td>Post</td><td>$wp_rewrite->permalink_structure</td></tr>";
    foreach ($wp_post_types as $post_type) {
        if (
            !empty($post_type->name) &&
            !empty($post_type->label) &&
            // @phpstan-ignore-next-line
            !empty($post_type->rewrite['slug'])
        ) {
            $post_type_bg = !empty($query->query_vars['post_type']) &&
                $post_type->name === $query->query_vars['post_type']
                ? 'style="background:yellow;"'
                : '';
            echo "<tr $post_type_bg><td>$post_type->label</td><td>" .
                $post_type->rewrite['slug'] . '</td></tr>';
        }
    }
    echo '<tr><th align="left" colspan="2">Taxonomy</th></tr>';
    foreach ($wp_taxonomies as $taxonomy) {
        if (
            !empty($taxonomy->name) &&
            // @phpstan-ignore-next-line
            !empty($taxonomy->labels->singular_name) &&
            // @phpstan-ignore-next-line
            !empty($taxonomy->rewrite['slug'])
        ) {
            $taxonomy_bg = !empty($query->query_vars[$taxonomy->name])
                ? 'style="background:yellow;"'
                : '';
            echo "<tr $taxonomy_bg><td>{$taxonomy->labels->singular_name}</td><td>" .
                $wp_rewrite->get_extra_permastruct($taxonomy->name) . '</td></tr>';
        }
    }
    echo '<tr><th align="left" colspan="2">Archive</th></tr>';
    $author_bg = !empty($query->query_vars['author_name']) ? 'style="background:yellow;"' : '';
    echo "<tr $author_bg><td>Author</td><td>" .
            $wp_rewrite->get_author_permastruct() . '</td></tr>';
    $date_bg = !empty($query->query_vars['author_name']) ? 'style="background:yellow;"' : '';
    echo "<tr $date_bg><td>Date</td><td>" .
            $wp_rewrite->get_date_permastruct() . '</td></tr>';
    echo '<tr><th align="left" colspan="2">Feed</th></tr>';
    echo "<tr><td>Feed</td><td>" .
            $wp_rewrite->get_feed_permastruct() . '</td></tr>';
    echo "<tr><td>Comments</td><td>" .
            $wp_rewrite->get_comment_feed_permastruct() . '</td></tr>';
    echo '</table>';

    echo '<h2>Request</h2><p>' .
        var_export($query->request, true) . '</p>';

    $matched_bg = !empty($query->matched_rule) ? 'style="background:yellow;"' : '';
    echo "<h2>Matched Rewrite Rule</h2><p $matched_bg>" .
        var_export($query->matched_rule, true) . '</p>';

    echo '<h2>Matched Query</h2><p>' .
        var_export($query->matched_query, true) . '</p>';

    echo '<h2>Query Variables</h2><p>' .
        var_export($query->query_vars, true) . '</p>';

    echo '<p><strong>--- END REWRITE DEBUG ---</strong></p>';
}

/**
 * Build a regex to match the feed section of URLs, something like (feed|atom|rss|rss2)/?
 * Based on: https://github.com/WordPress/WordPress/blob/master/wp-includes/class-wp-rewrite.php#L873
 *
 * @param int $version
 * @return string
 */
function tsg_get_feedregex(int $version = 1): string
{
    global $wp_rewrite;

    $feedregex2 = '';
    foreach ((array)$wp_rewrite->feeds as $feed_name) {
        $feedregex2 .= $feed_name . '|';
    }
    $feedregex2 = '(' . trim($feedregex2, '|') . ')/?$';

    if ($version === 2) {
        return $feedregex2;
    }

    /*
     * $feedregex is identical but with /feed/ added on as well, so URLs like <permalink>/feed/atom
     * and <permalink>/atom are both possible
     */
    return $wp_rewrite->feed_base . '/' . $feedregex2;
}

/**
 * Replace an array key without changing the array key position.
 * Based on: https://stackoverflow.com/a/8884153
 *
 * @param array $array
 * @return array
 */
function tsg_replace_key(array $array, $old_key, $new_key): array
{
    $keys = array_keys($array);
    $index = array_search($old_key, $keys, true);

    if ($index === false) {
        return $array;
    }

    $keys[$index] = $new_key;

    return array_combine($keys, array_values($array));
}

https://gist.github.com/ThatStevensGuy/39e92db4d38c8b763f55856f38e9f3e0

Performance Issues

The results of all suburb taxonomy terms are stored in a WordPress option. WordPress options are cached for fast retrieval. If you combine W3 Total Cache with dynamic content caching, such as Cloudflare APO, you should have no performance issues.

Custom Post Types

The current version of WordPress (5.7.2) does not apply a filter to replace permalink structure tags with regex for custom post types. I have provided a filter in the sample code that corrects this issue.

WordPress Logo

WordPress Multisite unfiltered_html Capability

User roles other than Super Admin cannot be assigned the unfiltered_html capability in WordPress Multisite. You can set it, but then WordPress disables the capability after the fact.

This creates a unique WordPress challenge. Your site admins will not have the ability to add a hard coded iframe or embed to a post.

Solution

WordPress does not offer an easy function to find a users role. I have provided a function to achieve that as part of the solution.

/**
 * Simulate assigning the unfiltered_html capability to a role.
 *
 * @return void
 */
add_action('admin_init', 'tsg_kses_remove_filters');
function tsg_kses_remove_filters(): void
{
    if (tsg_user_has_role('editor', wp_get_current_user())) {
        kses_remove_filters();
    }
}

/**
 * Check if a user has a role.
 *
 * @param string $role
 * @param null|WP_User $user
 * @return bool
 */
function tsg_user_has_role(string $role = '', $user = null): bool
{
    if (is_object($user)) {
        $user = $user->ID;
    }

    $user = $user ? new WP_User($user) : wp_get_current_user();

    if (empty($user->roles)) {
        return false;
    }

    if (in_array($role, $user->roles)) {
        return true;
    }

    return false;
}
WordPress Logo

Automatic Featured Image for WordPress Posts

The following script will automatically set the first image in a WordPress post as the Featured Image when the post is saved. You can override the functionality by selecting a Featured Image.

/**
 * @package WordPress
 * @subpackage Automatic Featured Image for WordPress Posts
 * @author That Stevens Guy
 * @phpcs:disable PSR1.Files.SideEffects
 */

/**
 * Transition post status action.
 *
 * @param string $new_status
 * @param string $old_status
 * @param WP_Post $post
 * @return void
 */
add_action('transition_post_status', function (string $new_status, string $old_status, WP_Post $post): void {
    if (defined('REST_REQUEST') && REST_REQUEST) {
        $published_post = $post;

        /**
         * REST requests need to postpone changes until "rest_after_insert_{$post->post_type}".
         *
         * @param WP_Post $post
         * @param WP_REST_Request $request
         * @param bool $creating
         * @return void
         */

        add_action("rest_after_insert_{$post->post_type}", function (
            WP_Post $post,
            WP_REST_Request $request,
            bool $creating
        ) use (
            $new_status,
            $old_status,
            $published_post
        ): void {
            if ($published_post->ID !== $post->ID) {
                return;
            }

            tsg_transition_post_status($new_status, $old_status, $post);
        }, 10, 3);
    } else {
        tsg_transition_post_status($new_status, $old_status, $post);
    }
}, 10, 3);

/**
 * Transition post status function.
 *
 * @param string $new_status
 * @param string $old_status
 * @param WP_Post $post
 * @return void
 */
function tsg_transition_post_status(string $new_status, string $old_status, WP_Post $post): void
{
    // Set the first image in post_content as the Featured Image. If one wasn't set.
    tsg_set_featured_image($post);
}

/**
 * Set the Featured Image automatically.
 *
 * @param WP_Post $post
 * @return void
 */
function tsg_set_featured_image(WP_Post $post): void
{
    if (!in_array($post->post_type, [ 'post' ])) {
        return;
    }

    // Bypass automatic featured image if the post thumbnail was set manually.
    if (has_post_thumbnail($post)) {
        return;
    }

    $attachment_ids = tsg_get_image_attachment_ids_from_post_content(
        $post,
        [
            'get_first_attachment_id' => true,
            'check_aspect_ratio' => true
        ]
    );

    if (!empty($attachment_ids[ 0 ])) {
        update_post_meta($post->ID, '_thumbnail_id', $attachment_ids[ 0 ]);
    }
}

/**
 * Get image attachment ids from post content.
 *
 * @param WP_Post $post
 * @param array $args
 * @return array
 */
function tsg_get_image_attachment_ids_from_post_content(WP_Post $post, array $args = []): array
{
    $args = array_merge([
        'get_first_attachment_id' => false,
        'check_aspect_ratio' => false,
        'horizontal_aspect_ratio' => 2.5,
        'vertical_aspect_ratio' => 2.5
    ], $args);

    $attachment_ids = [];

    $images = tsg_get_images_from_post_content($post);

    if (empty($images)) {
        return $attachment_ids;
    }

    $site_url = parse_url(site_url());

    foreach ($images as $image) {
        // If the image is NOT from the current site, skip it.
        if (strpos($image[ 'src' ], $site_url[ 'host' ] . '/' . explode('/', $image[ 'src' ])[ 3 ]) === false) {
            continue;
        }

        $guid = tsg_get_original_image_src($image[ 'src' ]);

        if (empty($guid)) {
            continue;
        }

        $attachment_id = tsg_get_post_id_by_guid($guid);

        if (empty($attachment_id)) {
            continue;
        }

        if ($args[ 'check_aspect_ratio' ]) {
            $attachment_metadata = get_metadata('post', $attachment_id, '_wp_attachment_metadata', true);

            if (
                !tsg_check_image_aspect_ratio(
                    $attachment_metadata,
                    $args[ 'horizontal_aspect_ratio' ],
                    $args[ 'vertical_aspect_ratio' ]
                )
            ) {
                continue;
            }
        }

        $attachment_ids[] = $attachment_id;

        if ($args[ 'get_first_attachment_id' ]) {
            break;
        }
    }

    return $attachment_ids;
}

/**
 * Get the original image source, size 'full'.
 *
 * @param string $url
 * @param array $args
 * @return string
 */
function tsg_get_original_image_src(string $url, array $args = []): string
{
    if (!$url) {
        return $url;
    }

    $args = array_merge([
        'check_exists' => false,
        'check_filesize' => false,
        'filesize_limit' => 4000000,
        'strip_edit' => false
    ], $args);

    // Strip the thumbnail size at the end of the URL so that we end up with what
    // potentially could be the full size original image source.
    //
    // There is an edge case where the original URL has dimensions in the filename
    // with the same format. These will be skipped, this is unaviodable, particularly
    // for an offsite URL.
    $url = preg_replace("/\-\d{2,4}[xX]\d{2,4}(\.[a-zA-Z]{2,4})$/", '$1', $url);

    // Strip the edit timestamp for WordPress edited images.
    // Turns out this isn't the best idea, end up with unedited images. But can be used for some things.
    if (!empty($args[ 'strip_edit' ])) {
        if (strpos($url, '-e') !== false) {
            $pathinfo = pathinfo($url);

            if (
                !empty($pathinfo[ 'dirname' ]) &&
                !empty($pathinfo[ 'filename' ]) &&
                !empty($pathinfo[ 'extension' ])
            ) {
                $filename_split = array_reverse(explode('-e', $pathinfo[ 'filename' ]));

                if (!empty($filename_split[ 0 ]) && is_int((int)$filename_split[ 0 ])) {
                    unset($filename_split[ 0 ]);
                }

                $url = $pathinfo[ 'dirname' ] . '/' .
                    implode('-e', array_reverse($filename_split)) . '.' . $pathinfo [ 'extension' ];
            }
        }
    }

    // Because we've chopped the URL up so much, we may want to check if the image even exists.
    if (!empty($args[ 'check_exists' ]) || !empty($args[ 'check_filesize' ])) {
        $stream_options = [
            'http' => [
                'user_agent' =>
                    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6"
            ],
            // 'ssl'  => [
            //     'verify_peer' => false,
            //     'verify_peer_name' => false
            // ]
        ];
        $stream_context = stream_context_create($stream_options);
        $headers = @get_headers($url, true, $stream_context);

        if (!empty($args[ 'check_exists' ])) {
            if (empty($headers[ 0 ]) || strpos($headers[ 0 ], '404') !== false) {
                $url = '';
            }
        }

        if ($url && !empty($args[ 'check_filesize' ]) && !empty($args[ 'filesize_limit' ])) {
            if (
                empty($headers[ 'Content-Length' ]) ||
                (int)$headers[ 'Content-Length' ] > (int)$args[ 'filesize_limit' ]
            ) {
                $url = '';
            }
        }
    }

    return $url;
}

/**
 * Get all images from the post content.
 *
 * @param WP_Post $post
 * @return array
 */
function tsg_get_images_from_post_content(WP_Post $post): array
{
    $images = [];

    if (empty($post->post_content)) {
        return $images;
    }

    $content = apply_filters('the_content', $post->post_content);

    preg_match_all('/<img\b[^>]+src=[\'"]([^\'"]+\.(?:jpg|png|jpeg))[\'"][^>]*>/i', $content, $matchesImages);

    if (!empty($matchesImages[ 0 ])) {
        foreach ($matchesImages[ 0 ] as $key => $img) {
            $images[ $key ][ 'img' ] = $img;
            $images[ $key ][ 'src' ] = $matchesImages[ 1 ][ $key ];

            preg_match_all(
                '/(<img\b|(?!^)\G)[^>]*?\b(alt|width|height|srcset|sizes)=([\'"]?)([^>]*?)\3/i',
                $img,
                $matchesAttr
            );

            if (!empty($matchesAttr[ 2 ])) {
                foreach ($matchesAttr[ 2 ] as $attr_key => $attr) {
                    if (!empty($matchesAttr[ 4 ][ $attr_key ])) {
                        $images[ $key ][ $attr ] = $matchesAttr[ 4 ][ $attr_key ];
                    }
                }
            }
        }
    }

    $images = apply_filters('tsg_get_images_from_post_content', $images, $post);

    return $images;
}

/**
 * Get check if an image fits within a suitable aspect ratio.
 *
 * @param array $image [ 'height' => int, 'width' => int ]
 * @param float $horizontal_aspect_ratio
 * @param float $vertical_aspect_ratio
 * @return bool
 */
function tsg_check_image_aspect_ratio(
    array $image,
    float $horizontal_aspect_ratio = 2.5,
    float $vertical_aspect_ratio = 2.5
): bool {
    if (empty($image[ 'width' ]) || empty($image[ 'height' ])) {
        return false;
    }

    $calculated_horizontal_aspect_ratio = (int)$image[ 'width' ] / (int)$image[ 'height' ];
    $calculated_vertical_aspect_ratio = (int)$image[ 'height' ] / (int)$image[ 'width' ];

    if (
        $calculated_horizontal_aspect_ratio > $horizontal_aspect_ratio ||
        $calculated_vertical_aspect_ratio > $vertical_aspect_ratio
    ) {
        return false;
    }

    return true;
}

/**
 * Get post ID by guid.
 *
 * @param string $guid
 * @return int ID if found, 0 if not
 */
function tsg_get_post_id_by_guid(string $guid): int
{
    global $wpdb;

    $post_id = $wpdb->get_var(
        $wpdb->prepare("
            SELECT ID
            FROM $wpdb->posts
            WHERE instr( guid, '%s' ) > 0
        ", $guid)
    );

    return intval($post_id);
}

https://gist.github.com/ThatStevensGuy/7020010fe667106f79d2556f386933d0