433 lines
19 KiB
Python
Executable File
433 lines
19 KiB
Python
Executable File
#!/usr/bin/env python3.7
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
TFC - Onion-routed, endpoint secure messaging system
|
|
Copyright (C) 2013-2020 Markus Ottela
|
|
|
|
This file is part of TFC.
|
|
|
|
TFC is free software: you can redistribute it and/or modify it under the terms
|
|
of the GNU General Public License as published by the Free Software Foundation,
|
|
either version 3 of the License, or (at your option) any later version.
|
|
|
|
TFC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with TFC. If not, see <https://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import math
|
|
import multiprocessing
|
|
import os.path
|
|
import random
|
|
import time
|
|
|
|
from typing import List, Optional, Tuple
|
|
|
|
from src.common.crypto import argon2_kdf, blake2b, csprng
|
|
from src.common.database import TFCUnencryptedDatabase
|
|
from src.common.encoding import bytes_to_int, int_to_bytes
|
|
from src.common.exceptions import CriticalError, graceful_exit, SoftError
|
|
from src.common.input import pwd_prompt
|
|
from src.common.misc import ensure_dir, reset_terminal, separate_headers
|
|
from src.common.output import clear_screen, m_print, phase, print_on_previous_line
|
|
from src.common.word_list import eff_wordlist
|
|
from src.common.statics import (ARGON2_MIN_MEMORY_COST, ARGON2_MIN_PARALLELISM, ARGON2_MIN_TIME_COST,
|
|
ARGON2_SALT_LENGTH, BLAKE2_DIGEST_LENGTH, DIR_USER_DATA, DONE,
|
|
ENCODED_INTEGER_LENGTH, GENERATE, MASTERKEY_DB_SIZE, MAX_KEY_DERIVATION_TIME,
|
|
MIN_KEY_DERIVATION_TIME, PASSWORD_MIN_BIT_STRENGTH)
|
|
|
|
|
|
class MasterKey(object):
|
|
"""\
|
|
MasterKey object manages the 32-byte master key and methods related
|
|
to it. Master key is the key that protects all data written on disk.
|
|
"""
|
|
|
|
def __init__(self, operation: str, local_test: bool) -> None:
|
|
"""Create a new MasterKey object."""
|
|
self.operation = operation
|
|
self.file_name = f'{DIR_USER_DATA}{operation}_login_data'
|
|
self.database = TFCUnencryptedDatabase(self.file_name)
|
|
self.local_test = local_test
|
|
self.database_data = None # type: Optional[bytes]
|
|
|
|
ensure_dir(DIR_USER_DATA)
|
|
try:
|
|
if os.path.isfile(self.file_name):
|
|
self.master_key = self.load_master_key()
|
|
else:
|
|
self.master_key = self.new_master_key()
|
|
except (EOFError, KeyboardInterrupt):
|
|
graceful_exit()
|
|
|
|
@staticmethod
|
|
def timed_key_derivation(password: str,
|
|
salt: bytes,
|
|
time_cost: int,
|
|
memory_cost: int,
|
|
parallelism: int
|
|
) -> Tuple[bytes, float]:
|
|
"""Derive key and measure its derivation time."""
|
|
time_start = time.monotonic()
|
|
master_key = argon2_kdf(password, salt, time_cost, memory_cost, parallelism)
|
|
kd_time = time.monotonic() - time_start
|
|
|
|
return master_key, kd_time
|
|
|
|
def get_available_memory(self) -> int:
|
|
"""Return the amount of available memory in the system."""
|
|
fields = os.popen("/bin/cat /proc/meminfo").read().splitlines()
|
|
field = [f for f in fields if f.startswith("MemAvailable")][0]
|
|
mem_avail = int(field.split()[1])
|
|
|
|
if self.local_test:
|
|
mem_avail //= 2
|
|
|
|
return mem_avail
|
|
|
|
@staticmethod
|
|
def generate_master_password() -> Tuple[int, str]:
|
|
"""Generate a strong password using the EFF wordlist."""
|
|
word_space = len(eff_wordlist)
|
|
sys_rand = random.SystemRandom()
|
|
|
|
pwd_bit_strength = 0.0
|
|
password_words = [] # type: List[str]
|
|
|
|
while pwd_bit_strength < PASSWORD_MIN_BIT_STRENGTH:
|
|
password_words.append(sys_rand.choice(eff_wordlist))
|
|
pwd_bit_strength = math.log2(word_space ** len(password_words))
|
|
|
|
password = ' '.join(password_words)
|
|
|
|
return int(pwd_bit_strength), password
|
|
|
|
def new_master_key(self, replace: bool = True) -> bytes:
|
|
"""Create a new master key from password and salt.
|
|
|
|
The generated master key depends on a 256-bit salt and the
|
|
password entered by the user. Additional computational strength
|
|
is added by the slow hash function (Argon2id). The more cores
|
|
and the faster each core is, and the more memory the system has,
|
|
the more secure TFC data is under the same password.
|
|
|
|
This method automatically tweaks the Argon2 time and memory cost
|
|
parameters according to best practices as determined in
|
|
|
|
https://tools.ietf.org/html/draft-irtf-cfrg-argon2-09#section-4
|
|
|
|
1) For Argon2 type (y), Argon2id was selected because the
|
|
adversary might be able to run arbitrary code on Destination
|
|
Computer and thus perform a side-channel attack against the
|
|
function.
|
|
|
|
2) The maximum number of threads (h) is determined by the number
|
|
available in the system. However, during local testing this
|
|
number is reduced to half to allow simultaneous login to
|
|
Transmitter and Receiver Program.
|
|
|
|
3) The maximum amount of memory (m) is what the system has to
|
|
offer. For hard-drive encryption purposes, the recommendation
|
|
is 6GiB. TFC will use that amount (or even more) if available.
|
|
However, on less powerful systems, it will settle for less.
|
|
|
|
4) For key derivation time (x), the value is set to at least 3
|
|
seconds, with the maximum being 4 seconds. The minimum value
|
|
is the same as the recommendation for hard-drive encryption.
|
|
|
|
5) The salt length is set to 256-bits which is double the
|
|
recommended length. The salt size ensures that even in a
|
|
group of 4.8*10^29 users, the probability that two users
|
|
share the same salt is just 10^(-18).*
|
|
* https://en.wikipedia.org/wiki/Birthday_attack
|
|
|
|
The salt does not need additional protection as the security
|
|
it provides depends on the salt space in relation to the
|
|
number of attacked targets (i.e. if two or more physically
|
|
compromised systems happen to share the same salt, the
|
|
attacker can speed up the attack against those systems with
|
|
time-memory-trade-off attack).
|
|
|
|
6) The tag length isn't utilized. The result of the key
|
|
derivation is the master encryption key itself, which is set
|
|
to 32 bytes for use in XChaCha20-Poly1305.
|
|
|
|
7) Memory wiping feature is not provided.
|
|
|
|
To recognize the password is correct, the BLAKE2b hash of the
|
|
master key is stored together with key derivation parameters
|
|
into the login database.
|
|
The preimage resistance of BLAKE2b prevents derivation of
|
|
master key from the stored hash, and Argon2id ensures brute
|
|
force and dictionary attacks against the master password are
|
|
painfully slow even with GPUs/ASICs/FPGAs, as long as the
|
|
password is sufficiently strong.
|
|
"""
|
|
password = MasterKey.new_password()
|
|
salt = csprng(ARGON2_SALT_LENGTH)
|
|
|
|
# Determine the amount of memory used from the amount of free RAM in the system.
|
|
memory_cost = self.get_available_memory()
|
|
|
|
# Determine the number of threads to use
|
|
parallelism = multiprocessing.cpu_count()
|
|
if self.local_test:
|
|
parallelism = max(ARGON2_MIN_PARALLELISM, parallelism // 2)
|
|
|
|
# Determine time cost
|
|
time_cost, kd_time, master_key = self.determine_time_cost(password, salt, memory_cost, parallelism)
|
|
|
|
# Determine memory cost
|
|
if kd_time > MAX_KEY_DERIVATION_TIME:
|
|
memory_cost, master_key = self.determine_memory_cost(password, salt, time_cost, memory_cost, parallelism)
|
|
|
|
# Store values to database
|
|
database_data = (salt
|
|
+ blake2b(master_key)
|
|
+ int_to_bytes(time_cost)
|
|
+ int_to_bytes(memory_cost)
|
|
+ int_to_bytes(parallelism))
|
|
|
|
if replace:
|
|
self.database.store_unencrypted_database(database_data)
|
|
else:
|
|
# When replacing the master key, the new master key needs to be generated before
|
|
# databases are encrypted. However, storing the new master key shouldn't be done
|
|
# before all new databases have been successfully written. We therefore just cache
|
|
# the database data.
|
|
self.database_data = database_data
|
|
|
|
print_on_previous_line()
|
|
phase("Deriving master key")
|
|
phase(DONE, delay=1)
|
|
|
|
return master_key
|
|
|
|
def determine_time_cost(self,
|
|
password: str,
|
|
salt: bytes,
|
|
memory_cost: int,
|
|
parallelism: int
|
|
) -> Tuple[int, float, bytes]:
|
|
"""Find suitable time_cost value for Argon2id.
|
|
|
|
There are two acceptable time_cost values.
|
|
|
|
1. A time_cost value that together with all available memory
|
|
sets the key derivation time between MIN_KEY_DERIVATION_TIME
|
|
and MAX_KEY_DERIVATION_TIME. If during the search we find
|
|
such suitable time_cost value, we accept it as such.
|
|
|
|
2. In a situation where no time_cost value is suitable alone,
|
|
there will exist some time_cost value `t` that makes key
|
|
derivation too fast, and another time_cost value `t+1` that
|
|
makes key derivation too slow. In this case we are interested
|
|
in the latter value, as unlike `t`, the value `t+1` can be
|
|
fine-tuned to suitable key derivation time range by adjusting
|
|
the memory_cost parameter.
|
|
|
|
As time_cost has no upper limit, and as the amount of available
|
|
memory has tremendous effect on how long one round takes, it's
|
|
difficult to determine the upper bound for a time_cost binary
|
|
search. We therefore start with a single round, and by
|
|
benchmarking it, estimate how many rounds are needed to reach
|
|
the target zone. After every try, we update our time_cost
|
|
candidate based on new average time per round estimate, a value
|
|
that gets more accurate as the search progresses. If this
|
|
method isn't able to suggest a value larger than 1, we increase
|
|
time_cost by 1 anyway to prevent an Alderson loop.
|
|
|
|
Every time the time_cost value is increased, we update the lower
|
|
bound to narrow the search space of the binary search we can
|
|
switch to immediately, once the MAX_KEY_DERIVATION_TIME is
|
|
exceeded (i.e. once an upper bound is found). At that point, the
|
|
time_cost `t+1` can be found in log(n) time.
|
|
"""
|
|
lower_bound = ARGON2_MIN_TIME_COST # type: int
|
|
upper_bound = None # type: Optional[int]
|
|
time_cost = lower_bound
|
|
|
|
print(2*'\n')
|
|
|
|
while True:
|
|
print_on_previous_line()
|
|
phase(f"Trying time cost {time_cost}")
|
|
master_key, kd_time = self.timed_key_derivation(password, salt, time_cost, memory_cost, parallelism)
|
|
phase(f"{kd_time:.1f}s", done=True)
|
|
|
|
# Sentinel that checks if the binary search has ended, and that restarts
|
|
# the search if kd_time repeats. This prevents an Alderson loop.
|
|
if upper_bound is not None and time_cost in [lower_bound, upper_bound]: # pragma: no cover
|
|
lower_bound = ARGON2_MIN_TIME_COST
|
|
upper_bound = None
|
|
continue
|
|
|
|
if MIN_KEY_DERIVATION_TIME <= kd_time <= MAX_KEY_DERIVATION_TIME:
|
|
break
|
|
|
|
if kd_time < MIN_KEY_DERIVATION_TIME:
|
|
lower_bound = time_cost
|
|
|
|
if upper_bound is None:
|
|
avg_time_per_round = kd_time / time_cost
|
|
time_cost_candidate = math.floor(MAX_KEY_DERIVATION_TIME / avg_time_per_round)
|
|
time_cost = max(time_cost+1, time_cost_candidate)
|
|
|
|
else:
|
|
if time_cost + 1 == upper_bound:
|
|
time_cost += 1
|
|
break
|
|
|
|
time_cost = math.floor((lower_bound + upper_bound) / 2)
|
|
|
|
elif kd_time > MAX_KEY_DERIVATION_TIME:
|
|
upper_bound = time_cost
|
|
|
|
# Sentinel: If even a single round takes too long, it's the `t+1` we're looking for.
|
|
if time_cost == 1:
|
|
break
|
|
|
|
# Sentinel: If the current time_cost value (that was too large) is one
|
|
# greater than the lower_bound, we know current time_cost is at `t+1`.
|
|
if time_cost == lower_bound + 1:
|
|
break
|
|
|
|
# Otherwise we know the current time_cost is at least two integers greater
|
|
# than `t`. Our best candidate for `t` is lower_bound, but for all we know,
|
|
# `t` might be a much greater value. So we continue binary search for `t+1`
|
|
time_cost = math.floor((lower_bound + upper_bound) / 2)
|
|
|
|
return time_cost, kd_time, master_key
|
|
|
|
def determine_memory_cost(self,
|
|
password: str,
|
|
salt: bytes,
|
|
time_cost: int,
|
|
memory_cost: int,
|
|
parallelism: int,
|
|
) -> Tuple[int, bytes]:
|
|
"""Determine suitable memory_cost value for Argon2id.
|
|
|
|
If we reached this function, it means we found a `t+1` value for
|
|
time_cost (explained in the `determine_time_cost` function). We
|
|
therefore do a binary search on the amount of memory to use
|
|
until we hit the desired key derivation time range.
|
|
"""
|
|
lower_bound = ARGON2_MIN_MEMORY_COST
|
|
upper_bound = memory_cost
|
|
|
|
while True:
|
|
memory_cost = int(round((lower_bound + upper_bound) // 2, -3))
|
|
|
|
print_on_previous_line()
|
|
phase(f"Trying memory cost {memory_cost} KiB")
|
|
master_key, kd_time = self.timed_key_derivation(password, salt, time_cost, memory_cost, parallelism)
|
|
phase(f"{kd_time:.1f}s", done=True)
|
|
|
|
# If we found a suitable memory_cost value, we accept the key and the memory_cost.
|
|
if MIN_KEY_DERIVATION_TIME <= kd_time <= MAX_KEY_DERIVATION_TIME:
|
|
return memory_cost, master_key
|
|
|
|
# The search might fail e.g. if external CPU load causes delay in key
|
|
# derivation, which causes the search to continue into wrong branch. In
|
|
# such a situation the search is restarted. The binary search is problematic
|
|
# with tight key derivation time target ranges, so if the search keeps
|
|
# restarting, increasing MAX_KEY_DERIVATION_TIME (and thus expanding the
|
|
# range) will help finding suitable memory_cost value faster. Increasing
|
|
# MAX_KEY_DERIVATION_TIME slightly affects security (positively) and user
|
|
# experience (negatively).
|
|
if memory_cost == lower_bound or memory_cost == upper_bound:
|
|
lower_bound = ARGON2_MIN_MEMORY_COST
|
|
upper_bound = self.get_available_memory()
|
|
continue
|
|
|
|
if kd_time < MIN_KEY_DERIVATION_TIME:
|
|
lower_bound = memory_cost
|
|
|
|
elif kd_time > MAX_KEY_DERIVATION_TIME:
|
|
upper_bound = memory_cost
|
|
|
|
def replace_database_data(self) -> None:
|
|
"""Store cached database data into database."""
|
|
if self.database_data is not None:
|
|
self.database.store_unencrypted_database(self.database_data)
|
|
self.database_data = None
|
|
|
|
def load_master_key(self) -> bytes:
|
|
"""Derive the master key from password and salt.
|
|
|
|
Load the salt, hash, and key derivation settings from the login
|
|
database. Derive the purported master key from the salt and
|
|
entered password. If the BLAKE2b hash of derived master key
|
|
matches the hash in the login database, accept the derived
|
|
master key.
|
|
"""
|
|
database_data = self.database.load_database()
|
|
|
|
if len(database_data) != MASTERKEY_DB_SIZE:
|
|
raise CriticalError(f"Invalid {self.file_name} database size.")
|
|
|
|
salt, key_hash, time_bytes, memory_bytes, parallelism_bytes \
|
|
= separate_headers(database_data, [ARGON2_SALT_LENGTH, BLAKE2_DIGEST_LENGTH,
|
|
ENCODED_INTEGER_LENGTH, ENCODED_INTEGER_LENGTH])
|
|
|
|
time_cost = bytes_to_int(time_bytes)
|
|
memory_cost = bytes_to_int(memory_bytes)
|
|
parallelism = bytes_to_int(parallelism_bytes)
|
|
|
|
while True:
|
|
password = MasterKey.get_password()
|
|
phase("Deriving master key", head=2, offset=len("Password correct"))
|
|
purp_key = argon2_kdf(password, salt, time_cost, memory_cost, parallelism)
|
|
|
|
if blake2b(purp_key) == key_hash:
|
|
phase("Password correct", done=True, delay=1)
|
|
clear_screen()
|
|
return purp_key
|
|
|
|
phase("Invalid password", done=True, delay=1)
|
|
print_on_previous_line(reps=5)
|
|
|
|
@classmethod
|
|
def new_password(cls, purpose: str = "master password") -> str:
|
|
"""Prompt the user to enter and confirm a new password."""
|
|
password_1 = pwd_prompt(f"Enter a new {purpose}: ")
|
|
|
|
if password_1 == GENERATE:
|
|
pwd_bit_strength, password_1 = MasterKey.generate_master_password()
|
|
|
|
m_print([f"Generated a {pwd_bit_strength}-bit password:",
|
|
'', password_1, '',
|
|
"Write down this password and dispose of the copy once you remember it.",
|
|
"Press <Enter> to continue."], manual_proceed=True, box=True, head=1, tail=1)
|
|
reset_terminal()
|
|
|
|
password_2 = password_1
|
|
else:
|
|
password_2 = pwd_prompt(f"Confirm the {purpose}: ", repeat=True)
|
|
|
|
if password_1 == password_2:
|
|
return password_1
|
|
|
|
m_print("Error: Passwords did not match. Try again.", head=1, tail=1)
|
|
print_on_previous_line(delay=1, reps=7)
|
|
return cls.new_password(purpose)
|
|
|
|
@classmethod
|
|
def get_password(cls, purpose: str = "master password") -> str:
|
|
"""Prompt the user to enter a password."""
|
|
return pwd_prompt(f"Enter {purpose}: ")
|
|
|
|
def authenticate_action(self) -> bool:
|
|
"""Return True if user entered correct master password to authenticate an action."""
|
|
try:
|
|
authenticated = self.load_master_key() == self.master_key
|
|
except (EOFError, KeyboardInterrupt):
|
|
raise SoftError(f"Authentication aborted.", tail_clear=True, head=2, delay=1)
|
|
|
|
return authenticated
|