From da0e1aa69defa7cbc87209966c751918f523f1fb Mon Sep 17 00:00:00 2001 From: Piotr Szarmanski Date: Sat, 5 Aug 2023 22:57:30 +0200 Subject: Encoder refactor, new tests and proper non-file stream handling --- src/backend.lisp | 15 ++++--- src/eris.lisp | 126 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 107 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/backend.lisp b/src/backend.lisp index 8d1405b..d2b81b2 100644 --- a/src/backend.lisp +++ b/src/backend.lisp @@ -27,11 +27,14 @@ "Using the BACKEND, return a stream that decodes the provided READ-CAPABILITY object.")) -(defgeneric store-data (input backend &key secret &allow-other-keys) +(defgeneric store-data (input backend &key secret block-size &allow-other-keys) (:documentation "Using the BACKEND, store the INPUT, which is either a stream or an octet vector. An additional 32-byte octet-vector SECRET can be provided in order to -protect the data from attacks against convergent encryption.")) +protect the data from attacks against convergent encryption. + +BLOCK-SIZE is by default 32kib, except if the input is a file or vector with a +size less than 16kib. It should be set either to 1024b or 32kib.")) ;; Default methods @@ -41,13 +44,15 @@ protect the data from attacks against convergent encryption.")) (with-slots (fetch-function) backend (eris-decode read-capability fetch-function))) -(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) &allow-other-keys) +(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) (block-size 32kib) &allow-other-keys) (declare (type octet-vector secret)) (with-slots (output-function) backend (eris-encode input ;; According to ERIS spec recommendation. (if (> (etypecase input - (stream (file-length input)) - (vector (length input))) + (pathname (file-size input)) + (file-stream (file-length input)) + (vector (length input)) + (t block-size)) 16384) 32kib 1kib) diff --git a/src/eris.lisp b/src/eris.lisp index 7efdc73..196bcce 100644 --- a/src/eris.lisp +++ b/src/eris.lisp @@ -133,26 +133,76 @@ versioning bytes are not supported by eris-cl." (declare (type string urn)) (base32-to-bytes-unpadded (subseq urn (1+ (position #\: urn :from-end t))))) +;; This macro assumes that there are variables BLOCK, SECRET and OUTPUT-FUNCTION +;; in the lexenv. -(defun pad (input block-size) - (declare (type octet-vector input) - (type integer block-size)) - (let* ((pad-size (- block-size (mod (length input) block-size))) - (padded-input (adjust-array input (+ pad-size (length input)) :initial-element 0))) - (replace padded-input input) - (setf (aref padded-input (length input)) #x80) - padded-input)) - -(defmacro output-block (ref-vector) +(defmacro output-block (rks i) `(let ((rk (encrypt-block block secret))) - (vector-push-extend rk ,ref-vector) + (setf (svref ,rks ,i) rk) (funcall output-function block (subseq rk 0 32)))) + (defmacro output-internal-block (ref-vector nonce) `(let ((rk (encrypt-internal-block block ,nonce))) (vector-push-extend rk ,ref-vector) (funcall output-function block (subseq rk 0 32)))) + +;; These CHUNK- functions are written in order to allow processing files in +;; parallel. + +(defun chunk-array (array block-size output-function secret &key pad) + "Split (SIMPLE-ARRAY (UNSIGNED-BYTE 8) that is a multiple of BLOCK-SIZE into +chunks, output them and collect references. Returns a vector of references. + +Pass PAD as T if the output should be padded." + (declare (type block-size block-size) + (type octet-vector array)) + (let ((blocks (if pad + (/ (+ (length array) (- block-size (mod (length array) block-size))) block-size) + (/ (length array) block-size)))) + (let ((block (make-octet-vector block-size)) + (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret))) + (loop for i from 0 below (1- blocks) + do (progn + (replace block array :start2 (* block-size i)) + (setf block (output-block rks i)))) + ;; handle last block + (replace block array :start2 (* block-size (1- blocks))) + (when pad + (setf (aref block (mod (length array) block-size)) #x80) + (fill block 0 :start (1+ (mod (length array) block-size)))) + (output-block rks (1- blocks)) + rks))) + + +;; Implementation note: This is CHUNK-ARRAY but copypasted with (LENGTH ARRAY) +;; changed to LENGTH and REPLACE changed to READ-SEQUENCE. It is, however, more +;; memory-efficient than reading a file into an array and then chunking it. + +(defun chunk-stream (stream block-size output-function length secret &key pad) + "Like CHUNK-ARRAY, but with streams. LENGTH indicates the amount of bytes to +read and should be a multiple of BLOCK-SIZE unless PAD is T." + (declare (type block-size block-size) + (type integer length)) + (let ((blocks (if pad + (/ (+ length (- block-size (mod length block-size))) block-size) + (/ length block-size)))) + (let ((block (make-octet-vector block-size)) + ;; initialize with null-secret to please SBCL + (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret))) + (loop for i from 0 below (1- blocks) + do (progn + (read-sequence block stream ) + (setf block (output-block rks i)))) + ;; handle last block + (read-sequence block stream) + (when pad + (setf (aref block (mod length block-size)) #x80) + (fill block 0 :start (1+ (mod length block-size)))) + (output-block rks (1- blocks)) + rks))) + (defgeneric eris-encode (input block-size output-function &key secret) (:documentation "Encode an INPUT into BLOCK-SIZE (32kib or 1kib) blocks, that are output using @@ -161,30 +211,40 @@ encoded block and a 32-byte reference octet vector, and it MUST return a (SIMPLE-ARRAY (UNSIGNED-BYTE 8)) of equal size to the one given, which will be destructively modified. Returns a read-capability object. -An optional 32-byte secret can be passed for additional encryption using the -SECRET keyword argument.")) +A SECRET can be provided to use with encryption; otherwise the null secret (* 32 0x0) +is used.")) (defmethod eris-encode ((input vector) block-size output-function &key (secret null-secret)) (declare (type block-size block-size) (type function output-function) (type (octet-vector 32) secret)) + (eris-create-tree + (chunk-array input block-size output-function secret :pad t) + block-size output-function)) + +(defmethod eris-encode ((input pathname) block-size output-function &key (secret null-secret)) + (declare (type block-size block-size) + (type function output-function) + (type (octet-vector 32) secret)) + (with-open-file (f input :element-type 'octet) + (eris-create-tree + (chunk-stream f block-size output-function (file-length f) secret :pad t) + block-size output-function))) - (setf input (pad input block-size)) - - (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) - (block (make-array block-size :element-type 'octet :initial-element 0))) - (declare (type octet-vector block)) - (loop for i = 0 then (incf i) - until (= (length input) (* i block-size)) - do (progn (replace block input :start2 (* i block-size)) - (setf block (output-block reference-vector)) - (fill block 0))) - ;; always bzero the buffer; this is unoptimal (it only needs to be zeroed out to eliminate trailing junk) - ;; TODO: consider removing this entire function and replacing it with an octet stream - (eris-create-tree reference-vector block-size output-function))) +(defmethod eris-encode ((input file-stream) block-size output-function &key (secret null-secret)) + (declare (type block-size block-size) + (type function output-function) + (type (octet-vector 32) secret)) + (eris-create-tree + (chunk-stream input block-size output-function + (- (file-length input) (file-position input)) + secret :pad t) + block-size output-function)) + +;; This is the odd one out because it is not possible to determine the length of +;; a non-file stream (modulo broadcast and synonym streams). (defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret)) - "This method does not handle any IO related conditions." (declare (type block-size block-size) (type function output-function) (type (octet-vector 32) secret)) @@ -195,8 +255,10 @@ SECRET keyword argument.")) for i = 0 then (incf i) if (< bytes-read block-size) do (progn (setf (aref block bytes-read) #x80) - (fill block 0 :start (1+ bytes-read))) ;; bzero the buffer here to eliminate trailing junk - do (progn (setf block (output-block reference-vector))) + (fill block 0 :start (1+ bytes-read))) + do (progn (setf block (let ((rk (encrypt-block block secret))) + (vector-push-extend rk reference-vector) + (funcall output-function block (subseq rk 0 32))))) until (< bytes-read block-size)) (eris-create-tree reference-vector block-size output-function))) @@ -231,3 +293,9 @@ SECRET keyword argument.")) (output-internal-block reference-vector-l nonce))) (setf reference-vector reference-vector-l) (setf reference-vector-l (make-array 16 :adjustable t :fill-pointer 0))))) + + + + + + -- cgit v1.2.3