352 lines
12 KiB
Python
Executable File
352 lines
12 KiB
Python
Executable File
#!/bin/python
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from scapy.all import *
|
|
from hackingscripts import util
|
|
|
|
|
|
class HttpPacket(ABC):
|
|
def __init__(self, version):
|
|
self.version = version
|
|
self.headers = util.CaseInsensitiveDict()
|
|
self.payload = None
|
|
|
|
@staticmethod
|
|
def parse(data):
|
|
index = data.index(b"\r\n")
|
|
first_line = data[0:index+2].decode()
|
|
matches_req = re.match(HttpRequest.PATTERN.decode(), first_line)
|
|
matches_res = re.match(HttpResponse.PATTERN.decode(), first_line)
|
|
if matches_req:
|
|
http_packet = HttpRequest(*matches_req.groups())
|
|
elif matches_res:
|
|
http_packet = HttpResponse(*matches_res.groups())
|
|
else:
|
|
return None
|
|
|
|
header_end = data.index(b"\r\n\r\n")
|
|
header_buffer = data[index+2:header_end+2].decode()
|
|
http_packet.payload = data[header_end+4:]
|
|
for line in re.findall("([^:]+):\s?(.*)\r\n", header_buffer):
|
|
http_packet.headers[line[0]] = line[1]
|
|
|
|
return http_packet
|
|
|
|
@abstractmethod
|
|
def get_file_path(self):
|
|
pass
|
|
|
|
|
|
class HttpRequest(HttpPacket):
|
|
PATTERN = b"([A-Z]+) ([^ ]+) HTTP/([0-9.]+)\r\n"
|
|
|
|
def __init__(self, method, uri, version):
|
|
super().__init__(version)
|
|
self.method = method
|
|
self.uri = uri
|
|
|
|
def __repr__(self):
|
|
return f"{self.method} {self.uri} HTTP/{self.version}, payload=" + util.human_readable_size(len(self.payload))
|
|
|
|
def get_file_path(self):
|
|
return self.uri
|
|
|
|
|
|
class HttpResponse(HttpPacket):
|
|
PATTERN = b"HTTP/([0-9.]+) ([0-9]+) (.*)\r\n"
|
|
|
|
def __init__(self, version, status_code, status_text):
|
|
super().__init__(version)
|
|
self.status_code = int(status_code)
|
|
self.status_text = status_text
|
|
self.response_to = None
|
|
|
|
def get_file_path(self):
|
|
content_disposition = self.headers.get("Content-Disposition", None)
|
|
if content_disposition:
|
|
matches = re.findall(";\s*filename=\"?(.*)\"?(;|$)", content_disposition)
|
|
if matches:
|
|
return matches[0][0]
|
|
|
|
if self.response_to:
|
|
return self.response_to.get_file_path()
|
|
|
|
return None
|
|
|
|
def __repr__(self):
|
|
return f"HTTP/{self.version} {self.status_code} {self.status_text}, payload=" + util.human_readable_size(len(self.payload))
|
|
|
|
class PacketIterator:
|
|
def __init__(self, connection):
|
|
self.connection = connection
|
|
self.index = 0
|
|
|
|
def __iter__(self):
|
|
self.index = 0
|
|
return self
|
|
|
|
def __next__(self):
|
|
if self.has_more():
|
|
packet = self.connection.packets[self.index]
|
|
self.index += 1
|
|
return packet
|
|
else:
|
|
raise StopIteration
|
|
|
|
def peek(self):
|
|
return None if not self.has_more() else self.connection.packets[self.index]
|
|
|
|
def pop(self):
|
|
packet = self.peek()
|
|
if packet:
|
|
self.index += 1
|
|
return packet
|
|
|
|
def find_packet(self, pattern, sock_src=None):
|
|
for packet in self.connection.packets[self.index:]:
|
|
self.index += 1
|
|
tcp_packet = packet[TCP]
|
|
ip_hdr = packet[IP]
|
|
packet_src = f"{ip_hdr.src}:{tcp_packet.sport}"
|
|
if sock_src is not None and packet_src != sock_src:
|
|
continue
|
|
|
|
payload = bytes(tcp_packet.payload)
|
|
match = re.findall(pattern, payload)
|
|
if match:
|
|
return packet, match[0], packet_src
|
|
return None
|
|
|
|
def has_more(self):
|
|
return self.index < len(self.connection.packets)
|
|
|
|
|
|
class TcpConnection:
|
|
def __init__(self, sock_a, sock_b):
|
|
self.sock_a = sock_a
|
|
self.sock_b = sock_b
|
|
self.packets = []
|
|
self._payload_size = 0
|
|
|
|
def add_packet(self, packet):
|
|
self.packets.append(packet)
|
|
self._payload_size += len(packet[TCP].payload)
|
|
|
|
def get_key(self):
|
|
return TcpConnections._format_key(self.sock_a, self.sock_b)
|
|
|
|
def iterator(self):
|
|
return PacketIterator(self)
|
|
|
|
def get_other_sock(self, sock):
|
|
return self.sock_a if sock == self.sock_b else self.sock_b
|
|
|
|
def __repr__(self):
|
|
return f"{self.get_key()}: {len(self.packets)} packets, {util.human_readable_size(self._payload_size)}"
|
|
|
|
|
|
class TcpConnections:
|
|
def __init__(self):
|
|
self.connections = {}
|
|
|
|
def __contains__(self, item: TcpConnection):
|
|
return str(item) in self.connections
|
|
|
|
def add(self, element: TcpConnection):
|
|
self.connections[str(element)] = element
|
|
|
|
def __getitem__(self, item: TcpConnection):
|
|
return self.connections[str(item)]
|
|
|
|
def __iter__(self):
|
|
return iter(self.connections.values())
|
|
|
|
@staticmethod
|
|
def _format_key(sock_a, sock_b):
|
|
return f"{sock_a}<->{sock_b}" if sock_a < sock_b else f"{sock_b}<->{sock_a}"
|
|
|
|
def get_connection(self, sock_a, sock_b):
|
|
key = self._format_key(sock_a, sock_b)
|
|
return self.connections[key]
|
|
|
|
def add_packet(self, sock_src, sock_dst, packet):
|
|
key = self._format_key(sock_src, sock_dst)
|
|
if key not in self.connections:
|
|
self.connections[key] = TcpConnection(sock_src, sock_dst)
|
|
|
|
self.connections[key].add_packet(packet)
|
|
return self.connections[key]
|
|
|
|
|
|
class PcapExtractor:
|
|
def __init__(self, pcap_path, output_dir="extracted_files/", filters=None):
|
|
self.pcap_path = pcap_path
|
|
self.output_dir = output_dir
|
|
self.filters = filters if filters is not None else []
|
|
self._packets = None
|
|
|
|
def _open_file(self):
|
|
# self._packets = pcapkit.extract(fin=self.pcap_path, store=False, nofile=True)
|
|
self._packets = rdpcap(self.pcap_path)
|
|
|
|
def extract_all(self):
|
|
self._open_file()
|
|
http_packets = self._parse_http_packets()
|
|
filtered_packets = self._apply_filters(http_packets)
|
|
for packet in filtered_packets:
|
|
if len(packet.payload) > 0:
|
|
file_path = packet.get_file_path()
|
|
with open(os.path.join(self.output_dir, file_path.replace("/", "_")), "wb") as f:
|
|
f.write(packet.payload)
|
|
|
|
print(f"[+] Extracted: {file_path} {util.human_readable_size(len(packet.payload))} Bytes")
|
|
|
|
def _apply_filters(self, packets):
|
|
filtered_packets = packets
|
|
for f in self.filters:
|
|
filtered_packets = filter(f, filtered_packets)
|
|
return list(filtered_packets)
|
|
|
|
def list(self):
|
|
self._open_file()
|
|
http_packets = self._parse_http_packets()
|
|
filtered_packets = self._apply_filters(http_packets)
|
|
for packet in filtered_packets:
|
|
print(packet)
|
|
|
|
def get_http_packet(self, packet_iterator, sock_src, initial_packet):
|
|
http_buffer = raw(initial_packet[TCP].payload)
|
|
prev_seq = initial_packet[TCP].seq
|
|
buff = None
|
|
while packet_iterator.has_more():
|
|
next_packet = packet_iterator.peek()
|
|
if sock_src == f"{next_packet[IP].src}:{next_packet[TCP].sport}":
|
|
next_packet = packet_iterator.pop()
|
|
|
|
if buff is not None:
|
|
# if there is a buffered package, and the seq. number was not reused
|
|
if buff[0] != next_packet[TCP].seq:
|
|
# append this to output
|
|
http_buffer += buff[1]
|
|
buff = None
|
|
|
|
payload_len = len(next_packet[TCP].payload)
|
|
if next_packet[TCP].seq - prev_seq != payload_len and payload_len == 1:
|
|
buff = (next_packet[TCP].seq, raw(next_packet[TCP].payload))
|
|
# potential TCP ZeroWindowProbe
|
|
continue
|
|
|
|
assert next_packet[TCP].seq > prev_seq
|
|
assert next_packet[IP].frag == 0
|
|
http_buffer += raw(next_packet[TCP].payload)
|
|
prev_seq = next_packet[TCP].seq
|
|
else:
|
|
break
|
|
|
|
return HttpPacket.parse(http_buffer)
|
|
|
|
def _parse_http_packets(self):
|
|
|
|
connections = TcpConnections()
|
|
for packet in self._packets:
|
|
if TCP not in packet:
|
|
continue
|
|
|
|
ip_hdr = packet[IP]
|
|
tcp_packet = packet[TCP]
|
|
if len(tcp_packet.payload) == 0:
|
|
continue
|
|
|
|
sock_src = f"{ip_hdr.src}:{tcp_packet.sport}"
|
|
sock_dst = f"{ip_hdr.dst}:{tcp_packet.dport}"
|
|
connections.add_packet(sock_src, sock_dst, packet)
|
|
|
|
http_packets = []
|
|
for connection in connections:
|
|
packet_iterator = connection.iterator()
|
|
while packet_iterator.has_more():
|
|
request = packet_iterator.find_packet(HttpRequest.PATTERN)
|
|
if not request:
|
|
continue
|
|
|
|
packet, match, sock_src = request
|
|
method = match[0].decode()
|
|
file_name = match[1].decode().rsplit("?")[0]
|
|
http_request_packet = self.get_http_packet(packet_iterator, sock_src, packet)
|
|
http_packets.append(http_request_packet)
|
|
|
|
other_sock = connection.get_other_sock(sock_src)
|
|
response = packet_iterator.find_packet(HttpResponse.PATTERN, sock_src=other_sock)
|
|
if not response:
|
|
continue
|
|
|
|
packet, match, sock_src = response
|
|
status_code = match[1].decode()
|
|
http_response_packet = self.get_http_packet(packet_iterator, sock_src, packet)
|
|
http_response_packet.response_to = http_request_packet
|
|
http_packets.append(http_response_packet)
|
|
|
|
return http_packets
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("file", help="Path to pcap file to extract files from")
|
|
parser.add_argument("-o", "--output-dir", help="Path to destination directory", default="extracted_files/",
|
|
dest="output_dir")
|
|
parser.add_argument("-l", "--list", help="List available files only", default=False, action="store_true")
|
|
parser.add_argument("-e", "--extract", help="Extract files (default)", default=True, action="store_true")
|
|
parser.add_argument("-ec", "--exclude-codes", help="Exclude http status codes, default: 101,304,403,404",
|
|
default="101,304,403,404", dest="exclude_codes")
|
|
parser.add_argument("-ic", "--include-codes", help="Limit http status codes", type=str,
|
|
default="", dest="include_codes")
|
|
parser.add_argument("-fe", "--file-extensions", help="File extensions, e.g. txt,exe,pdf", type=str,
|
|
default="", dest="file_extensions")
|
|
parser.add_argument("-fn", "--file-name", help="File name, e.g. passwords.txt", type=str,
|
|
default="", dest="file_name")
|
|
parser.add_argument("-fp", "--file-path", help="File path (uri), e.g. /admin/index.html", type=str,
|
|
default="", dest="file_path")
|
|
# TODO: ports, ip_addresses...
|
|
|
|
args = parser.parse_args()
|
|
|
|
filters = [
|
|
lambda p: not isinstance(p, HttpResponse) or p.status_code not in [int(x) for x in args.exclude_codes.split(",")],
|
|
]
|
|
|
|
if args.include_codes:
|
|
filters.append(lambda p: not isinstance(p, HttpResponse) or p.status_code in [int(x) for x in args.include_codes.split(",")])
|
|
|
|
if args.file_extensions:
|
|
filters.append(lambda p: os.path.splitext(p.file_name)[1] in args.file_extensions.split(","))
|
|
|
|
if args.file_name:
|
|
filters.append(lambda p: os.path.basename(p.get_file_path()) == args.file_name)
|
|
|
|
if args.file_path:
|
|
filters.append(lambda p: p.get_file_path() == args.file_path)
|
|
|
|
pcap_path = args.file
|
|
if not os.path.isfile(pcap_path):
|
|
print("[-] File not found or not a file:", pcap_path)
|
|
exit(1)
|
|
|
|
output_dir = args.output_dir
|
|
if not os.path.isdir(output_dir):
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
if not os.path.isdir(output_dir):
|
|
print("[-] Output directory is not a directory or does not exist and could not be created:", output_dir)
|
|
exit(2)
|
|
|
|
pcap_extractor = PcapExtractor(pcap_path, output_dir, filters)
|
|
if args.list and args.extract:
|
|
print("[-] Can only specify one of list or extract, not both")
|
|
exit(3)
|
|
elif args.list:
|
|
pcap_extractor.list()
|
|
else:
|
|
pcap_extractor.extract_all()
|