"""External modules for string primitive."""
import itertools
import math
import random
from .base_primitive import BasePrimitive
[docs]
class String(BasePrimitive):
"""
Primitive that cycles through a library of "bad" strings.
This class originally used a variable 'fuzz_library' containing a list of smart fuzz values
global across all instances. The content was moved to a seclist file, removing the library variable.
:type name: str, optional
:param name: Name, for referencing later. Names should always be provided, but if not, a default
name will be given, defaults to None
:type default_value: str
:param default_value: Value used when the element is not being fuzzed. Should typically
represent a valid value.
:type size: int, optional
:param size: Deprecated. Static size of this field, leave None for dynamic, defaults to None.
Useless with min_len and max_len, kept for retrocompatibility.
:type padding: chr, optional
:param padding: Value to use as padding to fill static field size, defaults to "\\x00"
:type encoding: str, optional
:param encoding: String encoding, ex: utf_16_le for Microsoft Unicode, defaults to utf-8
:type min_len: int, optional
:param min_len: Minimum string length, defaults to None
:type max_len: int, optional
:type len_unit: str, optional
:param len_unit: Unit used to calculate length, defaults to "bytes", can be "bytes" or "chars"
If "chars", the length is checked in 'mutation', else in 'encode'.
:param max_len: Maximum string length, defaults to None
:type fuzzable: bool, optional
:param fuzzable: Enable/disable fuzzing of this primitive, defaults to true
:type seclist_path: string, optional
:param seclist_path: Path to seclist file, defaults to "home_made_seclists/boofuzz.txt".
Opened as utf-8.
:type use_long_strings: bool, optional
:param use_long_strings: Use built-in long strings, defaults to True
:type use_default_value: bool, optional
:param use_default_value: Use default value in the library, defaults to True
:type num_random_generations: int, optional
:param num_random_generations: Number of random string to generate, defaults to 50
:type num_random_mutations: int, optional
:param num_random_mutations: Number of mutations to generate, defaults to 40
"""
# Has to be sorted to avoid duplicates
long_string_seeds = [
"C",
"1",
"<",
">",
"'",
'"',
"/",
"\\",
"?",
"=",
"a=",
"&",
".",
",",
"(",
")",
"]",
"[",
"%",
"*",
"-",
"+",
"{",
"}",
"\x14",
"\x00",
"\xFE", # expands to 4 characters under utf1
"\xFF", # expands to 4 characters under utf1
"%\xfe\xf0%\x01\xff",
"/.",
"/.:/",
"/\\",
"<>",
"\r\n",
"\xde\xad\xbe\xef",
]
_long_string_lengths = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 32768, 0xFFFF]
_long_string_deltas = [-2, -1, 0, 1, 2]
_extra_long_string_lengths = [99999, 100000, 500000, 1000000]
_default_value_multipliers = [2, 10, 100]
_supported_encodings = ["utf-8", "utf-16", "utf-32", "ascii", "latin_1"]
def __init__(
self, *args, name=None, default_value="", size=None, padding=b"\x00", encoding="utf-8",
min_len=0, max_len=1000, len_unit="bytes",
use_long_strings=True, use_default_value=True, **kwargs
):
super().__init__(name=name, default_value=default_value, *args, **kwargs)
# Check encoding
if encoding not in self._supported_encodings:
raise ValueError(f"Unsupported encoding: {encoding}")
self.min_len = min_len
self.max_len = max_len
if self.min_len is not None and self.max_len is not None and self.max_len < self.min_len:
raise ValueError("max_len must be greater than or equal to min_len")
self.len_unit = len_unit
if self.len_unit not in ["bytes", "chars"]:
raise ValueError("len_unit must be 'bytes' or 'chars'")
# Keeping self.size for retrocompatibility
self.size = size
if self.size is not None:
self.max_len = self.size
self.min_len = self.size
self.encoding = encoding
self.padding = padding
if isinstance(padding, str):
self.padding = self.padding.encode(self.encoding)
self._static_num_mutations = None
self.random_indices = {}
self.use_long_strings = use_long_strings
self.use_default_value = use_default_value
# We want constant random numbers to generate reproducible test cases
local_random = random.Random(0)
previous_length = 0
# For every length add a random number of random indices to the random_indices dict.
# Prevent duplicates by adding only indices in between previous_length and current length.
for length in self._long_string_lengths:
self.random_indices[length] = local_random.sample(
range(previous_length, length),
local_random.randint(1, self._long_string_lengths[0])
)
previous_length = length
# Yielders
def _yield_from_file(self):
"""
Load fuzz library from file.
Yields lines from file that are not comments or empty.
Ignore if seclist_path is empty.
:raises : FileNotFoundError if file not found.
"""
if self.seclist_path:
abs_filepath = self._get_seclist_abs_path()
# Open file and yield lines that are not comments or empty
try:
with open(abs_filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
yield line
except FileNotFoundError as exc:
raise FileNotFoundError(f"File not found: {abs_filepath}") from exc
def _yield_long_strings(self):
"""
For every long string seed, yield a number of selectively chosen strings lengths.
Ignore if use_long_strings is False.
"""
if self.use_long_strings:
for sequence in self.long_string_seeds:
for size in [
length + delta
for length, delta
in itertools.product(self._long_string_lengths, self._long_string_deltas)
]:
if self.max_len is None or size <= self.max_len:
data = sequence * math.ceil(size / len(sequence))
yield data[:size]
else:
break
for size in self._extra_long_string_lengths:
if self.max_len is None or size <= self.max_len:
data = sequence * math.ceil(size / len(sequence))
yield data[:size]
else:
break
if self.max_len is not None:
data = sequence * math.ceil(self.max_len / len(sequence))
yield data
for size in self._long_string_lengths:
if self.max_len is None or size <= self.max_len:
s = "D" * size
for loc in self.random_indices[size]:
# Replace character at loc with terminator
yield s[:loc] + "\x00" + s[loc + 1:]
else:
break
def _yield_variable_default_value(self, default_value):
"""
Yield variable mutations of the default value if use_default_value is True.
"""
if self.use_default_value:
for length in self._default_value_multipliers:
value = default_value * length
yield value
# Mutators
def _adjust_mutation_for_size(self, fuzz_value: str):
"""
If the fuzz_value is too long, cut it. If it is too short, pad it.
:type fuzz_value: String
:param fuzz_value: The fuzz_value to adjust
:rtype: String
:return: The adjusted fuzz_value
"""
if self.max_len is not None and len(fuzz_value) > self.max_len:
fuzz_value = fuzz_value[: self.max_len]
if self.min_len is not None and len(fuzz_value) < self.min_len:
fuzz_value = fuzz_value + self.padding * (self.min_len - len(fuzz_value))
return fuzz_value
[docs]
def mutations(self, default_value):
"""
On first round, mutate the primitive by stepping through 4 elements, all optionnals.
The fuzz library, the default value, the long string library, and a file containing a list of strings
:type default_value: str
:param default_value: Default value of element.
:rtype: Generator of strings
:return: Yield a generator of mutated strings.
"""
# If round_type is "library", yield library values
if self.request.parent_session.round_type == "library":
last_val = None
for val in itertools.islice(itertools.chain(
self._yield_variable_default_value(default_value),
self._yield_long_strings(),
self._yield_from_file(),
), self.num_library_elements):
# Get the adjusted mutation for the current value if len_unit 'chars'
if self.len_unit == "chars":
current_val = self._adjust_mutation_for_size(val)
else:
current_val = val
# If the current value is the same as the last value, skip it
if last_val == current_val:
continue
last_val = current_val
yield current_val
# If round_type is "random_mutation", generate random mutations of library values
elif self.request.parent_session.round_type == "random_mutation":
# Get all the values from the itertools.chain
library = list(itertools.chain(
self._yield_variable_default_value(default_value),
self._yield_long_strings(),
self._yield_from_file(),
))
# If the seed index (the round number) is less than or equal to the max_rounds_mutation,
# mutate the character
if self.request.parent_session.seed_index < self.max_rounds_mutation:
# Get the seedth value of the itertools.chain
# seed isn't only used to generate random, but also as an index
current_val = self.get_nth(library, self.request.parent_session.seed_index)
# If the current value is not None, yield the mutated character.
# If it is None, do nothing.
random.seed(self.primitive_seed)
if current_val is not None:
for data in self._mutate_character(current_val):
if self.len_unit == "chars":
yield self._adjust_mutation_for_size(data)
else:
yield data
# If round_type is "random_generation", generate random strings
elif self.request.parent_session.round_type == "random_generation":
random.seed(self.primitive_seed)
for _ in range(self.num_random_generations):
if self.len_unit == "chars":
yield self._adjust_mutation_for_size(self.random_generation())
else:
yield self.random_generation()
# Else, raise an exception
else:
raise ValueError("Invalid mutation type")
[docs]
def encode(self, value, mutation_context=None):
value = value.encode(self.encoding, "replace")
if self.len_unit == "bytes":
value = self._adjust_mutation_for_size(value)
return value
[docs]
def num_mutations(self, default_value):
"""
Calculate and return the total number of mutations for this individual primitive.
Args:
default_value: Default value of element.
Returns:
int: Number of mutated forms this primitive can take
"""
if self.request.parent_session.round_type == "random_generation":
return self.num_random_generations
if self.request.parent_session.round_type == "random_mutation":
return self.num_random_mutations
variable_num_mutations = sum(1 for _
in self._yield_variable_default_value(default_value=default_value))
if self._static_num_mutations is None:
# Counting the number of mutations with default value ""
# results in 0 variable_num_mutations 3 * "" = ""
self._static_num_mutations = sum(1 for _ in self.mutations(default_value=""))
return self._static_num_mutations + variable_num_mutations
def _delete_random_character(self, string_to_mutate: str) -> str:
"""Returns s with a random character deleted"""
# If string is empty, return it
if string_to_mutate == "":
return string_to_mutate
# Choose a random position in the string
pos = random.randint(0, len(string_to_mutate) - 1)
# Remove the character at the chosen position
return string_to_mutate[:pos] + string_to_mutate[pos + 1:]
def _insert_random_character(self, string_to_mutate: str) -> str:
"""Returns s with a random character inserted"""
# Choose a random position in the string
pos = random.randint(0, len(string_to_mutate))
# Choose a random character in ASCII printable range
random_character = chr(random.randrange(32, 127))
# Insert the random character at the chosen position
return string_to_mutate[:pos] + random_character + string_to_mutate[pos:]
def _flip_random_bit(self, string_to_mutate: str):
"""Returns s with a random bit flipped in a random position"""
if string_to_mutate == "":
return string_to_mutate
# Choose a random position in the string
pos = random.randint(0, len(string_to_mutate) - 1)
c = string_to_mutate[pos]
# Choose a random bit to flip
bit = 1 << random.randint(0, 6)
new_c = chr(ord(c) ^ bit)
# Replace the character at the chosen position with the new character
return string_to_mutate[:pos] + new_c + string_to_mutate[pos + 1:]
def _mutate_character(self, string_to_mutate: str):
"""
Takes a raw string and mutates it recursively nbr_mutations times.
Yields the mutated strings, at each iteration.
Args:
string_to_mutate (str): Raw string to mutate.
nbr_mutations (int): Number of recursive mutations to generate.
"""
mutators = [
"_delete_random_character",
"_insert_random_character",
"_flip_random_bit"
]
# Mutates recursively nbr_mutations times
for _ in range(self.num_random_mutations):
mutator = random.choice(mutators)
if mutator == "_delete_random_character":
string_to_mutate = self._delete_random_character(string_to_mutate)
elif mutator == "_insert_random_character":
string_to_mutate = self._insert_random_character(string_to_mutate)
elif mutator == "_flip_random_bit":
string_to_mutate = self._flip_random_bit(string_to_mutate)
yield string_to_mutate
[docs]
def random_generation(self):
"""
Generate random strings of size between self.min_len and self.max_len
return Generated string.
"""
# Get a random length between min_len and max_len
length = random.randint(self.min_len, self.max_len)
# If the encoding is ascii
if self.encoding == "ascii":
return ''.join(chr(random.randint(0x00, 0xFF)) for _ in range(length))
# Generate a string using a range of Unicode characters
unicode_string = ''.join(
chr(random.choice([random.randint(0x0000, 0xFFFF), random.randint(0x010000, 0x10FFFF)]))
for _ in range(length))
return unicode_string
# Getters
[docs]
def get_default_value_multipliers(self):
"""Getter for _default_value_multipliers"""
return self._default_value_multipliers
[docs]
def get_long_string_lengths(self):
"""Getter for _long_string_lengths"""
return self._long_string_lengths
[docs]
def get_long_string_deltas(self):
"""Getter for _long_string_deltas"""
return self._long_string_deltas