summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPiotr Szarmanski2023-08-05 22:57:30 +0200
committerPiotr Szarmanski2023-08-05 22:57:30 +0200
commitda0e1aa69defa7cbc87209966c751918f523f1fb (patch)
treeba1796f8500d314d93e46874e910492bf83ff4a2 /src
parenta5e2232edc0415dc16643aaeaafe91bdb1d18b59 (diff)
Encoder refactor, new tests and proper non-file stream handling
Diffstat (limited to 'src')
-rw-r--r--src/backend.lisp15
-rw-r--r--src/eris.lisp126
2 files changed, 107 insertions, 34 deletions
diff --git a/src/backend.lisp b/src/backend.lisp
index 8d1405b..d2b81b2 100644
--- a/src/backend.lisp
+++ b/src/backend.lisp
@@ -27,11 +27,14 @@
"Using the BACKEND, return a stream that decodes the provided READ-CAPABILITY
object."))
-(defgeneric store-data (input backend &key secret &allow-other-keys)
+(defgeneric store-data (input backend &key secret block-size &allow-other-keys)
(:documentation
"Using the BACKEND, store the INPUT, which is either a stream or an octet
vector. An additional 32-byte octet-vector SECRET can be provided in order to
-protect the data from attacks against convergent encryption."))
+protect the data from attacks against convergent encryption.
+
+BLOCK-SIZE is by default 32kib, except if the input is a file or vector with a
+size less than 16kib. It should be set either to 1024b or 32kib."))
;; Default methods
@@ -41,13 +44,15 @@ protect the data from attacks against convergent encryption."))
(with-slots (fetch-function) backend
(eris-decode read-capability fetch-function)))
-(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) &allow-other-keys)
+(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) (block-size 32kib) &allow-other-keys)
(declare (type octet-vector secret))
(with-slots (output-function) backend
(eris-encode input ;; According to ERIS spec recommendation.
(if (> (etypecase input
- (stream (file-length input))
- (vector (length input)))
+ (pathname (file-size input))
+ (file-stream (file-length input))
+ (vector (length input))
+ (t block-size))
16384)
32kib
1kib)
diff --git a/src/eris.lisp b/src/eris.lisp
index 7efdc73..196bcce 100644
--- a/src/eris.lisp
+++ b/src/eris.lisp
@@ -133,26 +133,76 @@ versioning bytes are not supported by eris-cl."
(declare (type string urn))
(base32-to-bytes-unpadded (subseq urn (1+ (position #\: urn :from-end t)))))
+;; This macro assumes that there are variables BLOCK, SECRET and OUTPUT-FUNCTION
+;; in the lexenv.
-(defun pad (input block-size)
- (declare (type octet-vector input)
- (type integer block-size))
- (let* ((pad-size (- block-size (mod (length input) block-size)))
- (padded-input (adjust-array input (+ pad-size (length input)) :initial-element 0)))
- (replace padded-input input)
- (setf (aref padded-input (length input)) #x80)
- padded-input))
-
-(defmacro output-block (ref-vector)
+(defmacro output-block (rks i)
`(let ((rk (encrypt-block block secret)))
- (vector-push-extend rk ,ref-vector)
+ (setf (svref ,rks ,i) rk)
(funcall output-function block (subseq rk 0 32))))
+
(defmacro output-internal-block (ref-vector nonce)
`(let ((rk (encrypt-internal-block block ,nonce)))
(vector-push-extend rk ,ref-vector)
(funcall output-function block (subseq rk 0 32))))
+
+;; These CHUNK- functions are written in order to allow processing files in
+;; parallel.
+
+(defun chunk-array (array block-size output-function secret &key pad)
+ "Split (SIMPLE-ARRAY (UNSIGNED-BYTE 8) that is a multiple of BLOCK-SIZE into
+chunks, output them and collect references. Returns a vector of references.
+
+Pass PAD as T if the output should be padded."
+ (declare (type block-size block-size)
+ (type octet-vector array))
+ (let ((blocks (if pad
+ (/ (+ (length array) (- block-size (mod (length array) block-size))) block-size)
+ (/ (length array) block-size))))
+ (let ((block (make-octet-vector block-size))
+ (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret)))
+ (loop for i from 0 below (1- blocks)
+ do (progn
+ (replace block array :start2 (* block-size i))
+ (setf block (output-block rks i))))
+ ;; handle last block
+ (replace block array :start2 (* block-size (1- blocks)))
+ (when pad
+ (setf (aref block (mod (length array) block-size)) #x80)
+ (fill block 0 :start (1+ (mod (length array) block-size))))
+ (output-block rks (1- blocks))
+ rks)))
+
+
+;; Implementation note: This is CHUNK-ARRAY but copypasted with (LENGTH ARRAY)
+;; changed to LENGTH and REPLACE changed to READ-SEQUENCE. It is, however, more
+;; memory-efficient than reading a file into an array and then chunking it.
+
+(defun chunk-stream (stream block-size output-function length secret &key pad)
+ "Like CHUNK-ARRAY, but with streams. LENGTH indicates the amount of bytes to
+read and should be a multiple of BLOCK-SIZE unless PAD is T."
+ (declare (type block-size block-size)
+ (type integer length))
+ (let ((blocks (if pad
+ (/ (+ length (- block-size (mod length block-size))) block-size)
+ (/ length block-size))))
+ (let ((block (make-octet-vector block-size))
+ ;; initialize with null-secret to please SBCL
+ (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret)))
+ (loop for i from 0 below (1- blocks)
+ do (progn
+ (read-sequence block stream )
+ (setf block (output-block rks i))))
+ ;; handle last block
+ (read-sequence block stream)
+ (when pad
+ (setf (aref block (mod length block-size)) #x80)
+ (fill block 0 :start (1+ (mod length block-size))))
+ (output-block rks (1- blocks))
+ rks)))
+
(defgeneric eris-encode (input block-size output-function &key secret)
(:documentation
"Encode an INPUT into BLOCK-SIZE (32kib or 1kib) blocks, that are output using
@@ -161,30 +211,40 @@ encoded block and a 32-byte reference octet vector, and it MUST return
a (SIMPLE-ARRAY (UNSIGNED-BYTE 8)) of equal size to the one given, which will be
destructively modified. Returns a read-capability object.
-An optional 32-byte secret can be passed for additional encryption using the
-SECRET keyword argument."))
+A SECRET can be provided to use with encryption; otherwise the null secret (* 32 0x0)
+is used."))
(defmethod eris-encode ((input vector) block-size output-function &key (secret null-secret))
(declare (type block-size block-size)
(type function output-function)
(type (octet-vector 32) secret))
+ (eris-create-tree
+ (chunk-array input block-size output-function secret :pad t)
+ block-size output-function))
+
+(defmethod eris-encode ((input pathname) block-size output-function &key (secret null-secret))
+ (declare (type block-size block-size)
+ (type function output-function)
+ (type (octet-vector 32) secret))
+ (with-open-file (f input :element-type 'octet)
+ (eris-create-tree
+ (chunk-stream f block-size output-function (file-length f) secret :pad t)
+ block-size output-function)))
- (setf input (pad input block-size))
-
- (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0))
- (block (make-array block-size :element-type 'octet :initial-element 0)))
- (declare (type octet-vector block))
- (loop for i = 0 then (incf i)
- until (= (length input) (* i block-size))
- do (progn (replace block input :start2 (* i block-size))
- (setf block (output-block reference-vector))
- (fill block 0)))
- ;; always bzero the buffer; this is unoptimal (it only needs to be zeroed out to eliminate trailing junk)
- ;; TODO: consider removing this entire function and replacing it with an octet stream
- (eris-create-tree reference-vector block-size output-function)))
+(defmethod eris-encode ((input file-stream) block-size output-function &key (secret null-secret))
+ (declare (type block-size block-size)
+ (type function output-function)
+ (type (octet-vector 32) secret))
+ (eris-create-tree
+ (chunk-stream input block-size output-function
+ (- (file-length input) (file-position input))
+ secret :pad t)
+ block-size output-function))
+
+;; This is the odd one out because it is not possible to determine the length of
+;; a non-file stream (modulo broadcast and synonym streams).
(defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret))
- "This method does not handle any IO related conditions."
(declare (type block-size block-size)
(type function output-function)
(type (octet-vector 32) secret))
@@ -195,8 +255,10 @@ SECRET keyword argument."))
for i = 0 then (incf i)
if (< bytes-read block-size)
do (progn (setf (aref block bytes-read) #x80)
- (fill block 0 :start (1+ bytes-read))) ;; bzero the buffer here to eliminate trailing junk
- do (progn (setf block (output-block reference-vector)))
+ (fill block 0 :start (1+ bytes-read)))
+ do (progn (setf block (let ((rk (encrypt-block block secret)))
+ (vector-push-extend rk reference-vector)
+ (funcall output-function block (subseq rk 0 32)))))
until (< bytes-read block-size))
(eris-create-tree reference-vector block-size output-function)))
@@ -231,3 +293,9 @@ SECRET keyword argument."))
(output-internal-block reference-vector-l nonce)))
(setf reference-vector reference-vector-l)
(setf reference-vector-l (make-array 16 :adjustable t :fill-pointer 0)))))
+
+
+
+
+
+