summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPiotr Szarmanski2023-08-05 22:57:30 +0200
committerPiotr Szarmanski2023-08-05 22:57:30 +0200
commitda0e1aa69defa7cbc87209966c751918f523f1fb (patch)
treeba1796f8500d314d93e46874e910492bf83ff4a2
parenta5e2232edc0415dc16643aaeaafe91bdb1d18b59 (diff)
Encoder refactor, new tests and proper non-file stream handling
-rw-r--r--README13
-rw-r--r--src/backend.lisp15
-rw-r--r--src/eris.lisp126
-rw-r--r--tests/backend-tests.lisp9
-rw-r--r--tests/common.lisp15
-rw-r--r--tests/decode-tests.lisp18
-rw-r--r--tests/encode-tests.lisp128
7 files changed, 250 insertions, 74 deletions
diff --git a/README b/README
index b428895..53d4c6d 100644
--- a/README
+++ b/README
@@ -24,19 +24,13 @@ block-urn-to-reference
The eris-encode (INPUT BLOCK-SIZE OUTPUT-FUNCTION &KEY SECRET HASH-OUTPUT)
-function can be used to encode a vector or a stream into an ERIS
-read-capability.
+function can be used to encode a vector, stream or pathname into an ERIS
+read-capability.
The eris-decode (READ-CAPABILITY FETCH-FUNCTION &KEY (CACHE-CAPACITY 2048))
function can be used to decode an ERIS read-capability. It returns a stream of
the class ERIS-DECODE-STREAM: this class implements the Gray streams protocol.
-In addition, on POSIX systems, eris-decode-parallel (READ-CAPABILITY
-FETCH-FUNCTION OUTPUT-FILE &KEY (CACHE-CAPACITY 4096) (THREADS 4)
-(INITIAL-BINDINGS *DEFAULT-SPECIAL-BINDINGS*)) function is available. This
-function will attempt to decode an ERIS read-capability in parallel into a file
-specified by the OUTPUT-FILE string or pathspec.
-
See the docstrings of the specific functions for more details. However, you
should only use these to write custom backends; otherwise, see below..
@@ -48,8 +42,7 @@ fetch-function, caching details, block-size, etc. and the {en/de}coding
functions simply take the backend as an argument.
This interface consists of two generic functions: store-data, for encoding data,
-and fetch-read-capability, for retrieving the contents of a read-capability
-object.
+and fetch-data, for retrieving the contents from a read-capability object.
As an example, a file-based backend called file-backend is provided. It can be
used simply by making an instance of the 'file-backend class with a :directory
diff --git a/src/backend.lisp b/src/backend.lisp
index 8d1405b..d2b81b2 100644
--- a/src/backend.lisp
+++ b/src/backend.lisp
@@ -27,11 +27,14 @@
"Using the BACKEND, return a stream that decodes the provided READ-CAPABILITY
object."))
-(defgeneric store-data (input backend &key secret &allow-other-keys)
+(defgeneric store-data (input backend &key secret block-size &allow-other-keys)
(:documentation
"Using the BACKEND, store the INPUT, which is either a stream or an octet
vector. An additional 32-byte octet-vector SECRET can be provided in order to
-protect the data from attacks against convergent encryption."))
+protect the data from attacks against convergent encryption.
+
+BLOCK-SIZE is by default 32kib, except if the input is a file or vector with a
+size less than 16kib. It should be set either to 1024b or 32kib."))
;; Default methods
@@ -41,13 +44,15 @@ protect the data from attacks against convergent encryption."))
(with-slots (fetch-function) backend
(eris-decode read-capability fetch-function)))
-(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) &allow-other-keys)
+(defmethod store-data (input (backend encoding-backend) &key (secret null-secret) (block-size 32kib) &allow-other-keys)
(declare (type octet-vector secret))
(with-slots (output-function) backend
(eris-encode input ;; According to ERIS spec recommendation.
(if (> (etypecase input
- (stream (file-length input))
- (vector (length input)))
+ (pathname (file-size input))
+ (file-stream (file-length input))
+ (vector (length input))
+ (t block-size))
16384)
32kib
1kib)
diff --git a/src/eris.lisp b/src/eris.lisp
index 7efdc73..196bcce 100644
--- a/src/eris.lisp
+++ b/src/eris.lisp
@@ -133,26 +133,76 @@ versioning bytes are not supported by eris-cl."
(declare (type string urn))
(base32-to-bytes-unpadded (subseq urn (1+ (position #\: urn :from-end t)))))
+;; This macro assumes that there are variables BLOCK, SECRET and OUTPUT-FUNCTION
+;; in the lexenv.
-(defun pad (input block-size)
- (declare (type octet-vector input)
- (type integer block-size))
- (let* ((pad-size (- block-size (mod (length input) block-size)))
- (padded-input (adjust-array input (+ pad-size (length input)) :initial-element 0)))
- (replace padded-input input)
- (setf (aref padded-input (length input)) #x80)
- padded-input))
-
-(defmacro output-block (ref-vector)
+(defmacro output-block (rks i)
`(let ((rk (encrypt-block block secret)))
- (vector-push-extend rk ,ref-vector)
+ (setf (svref ,rks ,i) rk)
(funcall output-function block (subseq rk 0 32))))
+
(defmacro output-internal-block (ref-vector nonce)
`(let ((rk (encrypt-internal-block block ,nonce)))
(vector-push-extend rk ,ref-vector)
(funcall output-function block (subseq rk 0 32))))
+
+;; These CHUNK- functions are written in order to allow processing files in
+;; parallel.
+
+(defun chunk-array (array block-size output-function secret &key pad)
+ "Split (SIMPLE-ARRAY (UNSIGNED-BYTE 8) that is a multiple of BLOCK-SIZE into
+chunks, output them and collect references. Returns a vector of references.
+
+Pass PAD as T if the output should be padded."
+ (declare (type block-size block-size)
+ (type octet-vector array))
+ (let ((blocks (if pad
+ (/ (+ (length array) (- block-size (mod (length array) block-size))) block-size)
+ (/ (length array) block-size))))
+ (let ((block (make-octet-vector block-size))
+ (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret)))
+ (loop for i from 0 below (1- blocks)
+ do (progn
+ (replace block array :start2 (* block-size i))
+ (setf block (output-block rks i))))
+ ;; handle last block
+ (replace block array :start2 (* block-size (1- blocks)))
+ (when pad
+ (setf (aref block (mod (length array) block-size)) #x80)
+ (fill block 0 :start (1+ (mod (length array) block-size))))
+ (output-block rks (1- blocks))
+ rks)))
+
+
+;; Implementation note: This is CHUNK-ARRAY but copypasted with (LENGTH ARRAY)
+;; changed to LENGTH and REPLACE changed to READ-SEQUENCE. It is, however, more
+;; memory-efficient than reading a file into an array and then chunking it.
+
+(defun chunk-stream (stream block-size output-function length secret &key pad)
+ "Like CHUNK-ARRAY, but with streams. LENGTH indicates the amount of bytes to
+read and should be a multiple of BLOCK-SIZE unless PAD is T."
+ (declare (type block-size block-size)
+ (type integer length))
+ (let ((blocks (if pad
+ (/ (+ length (- block-size (mod length block-size))) block-size)
+ (/ length block-size))))
+ (let ((block (make-octet-vector block-size))
+ ;; initialize with null-secret to please SBCL
+ (rks (make-array blocks :element-type 'octet-vector :initial-element null-secret)))
+ (loop for i from 0 below (1- blocks)
+ do (progn
+ (read-sequence block stream )
+ (setf block (output-block rks i))))
+ ;; handle last block
+ (read-sequence block stream)
+ (when pad
+ (setf (aref block (mod length block-size)) #x80)
+ (fill block 0 :start (1+ (mod length block-size))))
+ (output-block rks (1- blocks))
+ rks)))
+
(defgeneric eris-encode (input block-size output-function &key secret)
(:documentation
"Encode an INPUT into BLOCK-SIZE (32kib or 1kib) blocks, that are output using
@@ -161,30 +211,40 @@ encoded block and a 32-byte reference octet vector, and it MUST return
a (SIMPLE-ARRAY (UNSIGNED-BYTE 8)) of equal size to the one given, which will be
destructively modified. Returns a read-capability object.
-An optional 32-byte secret can be passed for additional encryption using the
-SECRET keyword argument."))
+A SECRET can be provided to use with encryption; otherwise the null secret (* 32 0x0)
+is used."))
(defmethod eris-encode ((input vector) block-size output-function &key (secret null-secret))
(declare (type block-size block-size)
(type function output-function)
(type (octet-vector 32) secret))
+ (eris-create-tree
+ (chunk-array input block-size output-function secret :pad t)
+ block-size output-function))
+
+(defmethod eris-encode ((input pathname) block-size output-function &key (secret null-secret))
+ (declare (type block-size block-size)
+ (type function output-function)
+ (type (octet-vector 32) secret))
+ (with-open-file (f input :element-type 'octet)
+ (eris-create-tree
+ (chunk-stream f block-size output-function (file-length f) secret :pad t)
+ block-size output-function)))
- (setf input (pad input block-size))
-
- (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0))
- (block (make-array block-size :element-type 'octet :initial-element 0)))
- (declare (type octet-vector block))
- (loop for i = 0 then (incf i)
- until (= (length input) (* i block-size))
- do (progn (replace block input :start2 (* i block-size))
- (setf block (output-block reference-vector))
- (fill block 0)))
- ;; always bzero the buffer; this is unoptimal (it only needs to be zeroed out to eliminate trailing junk)
- ;; TODO: consider removing this entire function and replacing it with an octet stream
- (eris-create-tree reference-vector block-size output-function)))
+(defmethod eris-encode ((input file-stream) block-size output-function &key (secret null-secret))
+ (declare (type block-size block-size)
+ (type function output-function)
+ (type (octet-vector 32) secret))
+ (eris-create-tree
+ (chunk-stream input block-size output-function
+ (- (file-length input) (file-position input))
+ secret :pad t)
+ block-size output-function))
+
+;; This is the odd one out because it is not possible to determine the length of
+;; a non-file stream (modulo broadcast and synonym streams).
(defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret))
- "This method does not handle any IO related conditions."
(declare (type block-size block-size)
(type function output-function)
(type (octet-vector 32) secret))
@@ -195,8 +255,10 @@ SECRET keyword argument."))
for i = 0 then (incf i)
if (< bytes-read block-size)
do (progn (setf (aref block bytes-read) #x80)
- (fill block 0 :start (1+ bytes-read))) ;; bzero the buffer here to eliminate trailing junk
- do (progn (setf block (output-block reference-vector)))
+ (fill block 0 :start (1+ bytes-read)))
+ do (progn (setf block (let ((rk (encrypt-block block secret)))
+ (vector-push-extend rk reference-vector)
+ (funcall output-function block (subseq rk 0 32)))))
until (< bytes-read block-size))
(eris-create-tree reference-vector block-size output-function)))
@@ -231,3 +293,9 @@ SECRET keyword argument."))
(output-internal-block reference-vector-l nonce)))
(setf reference-vector reference-vector-l)
(setf reference-vector-l (make-array 16 :adjustable t :fill-pointer 0)))))
+
+
+
+
+
+
diff --git a/tests/backend-tests.lisp b/tests/backend-tests.lisp
index dc411d5..0f60267 100644
--- a/tests/backend-tests.lisp
+++ b/tests/backend-tests.lisp
@@ -34,15 +34,6 @@
(test-hash-backend (make-octets 16834 :element 5) 32kib)
(test-hash-backend (make-octets 96000 :element 5) 32kib))
-(defun make-temporary-dir ()
- (let* ((tmpdir (uiop:temporary-directory))
- (tmp-tmpdir (make-pathname :directory (serapeum:append1
- (pathname-directory tmpdir)
- (ironclad:byte-array-to-hex-string (ironclad:random-data 10)))
- :defaults tmpdir)))
- (ensure-directories-exist tmp-tmpdir)
- tmp-tmpdir))
-
(defmacro test-file-backend (array &optional (secret null-secret))
`(let ((tmpdir (make-temporary-dir)))
(unwind-protect
diff --git a/tests/common.lisp b/tests/common.lisp
index 99a85c9..7a9309f 100644
--- a/tests/common.lisp
+++ b/tests/common.lisp
@@ -21,3 +21,18 @@
(defmacro make-octet-array-with-loop (loop)
`(let ((seq ,loop))
(make-array (length seq) :element-type '(unsigned-byte 8) :initial-contents seq) ))
+
+(defun make-temporary-dir ()
+ (let* ((tmpdir (uiop:temporary-directory))
+ (tmp-tmpdir (make-pathname :directory (serapeum:append1
+ (pathname-directory tmpdir)
+ (ironclad:byte-array-to-hex-string (ironclad:random-data 10)))
+ :defaults tmpdir)))
+ (ensure-directories-exist tmp-tmpdir)
+ tmp-tmpdir))
+
+(defmacro with-temporary-dir (sym &body expr)
+ `(let ((,sym (make-temporary-dir)))
+ (unwind-protect
+ (progn ,@expr)
+ (uiop:delete-directory-tree ,sym :validate t))))
diff --git a/tests/decode-tests.lisp b/tests/decode-tests.lisp
index 5053d11..918cd82 100644
--- a/tests/decode-tests.lisp
+++ b/tests/decode-tests.lisp
@@ -96,7 +96,9 @@
(assert-array-decode (make-octets 16385 :element 8) 1024)
(assert-array-decode (make-octets 32767 :element 9) 1024)
(assert-array-decode (make-octets 32768 :element 10) 1024)
- (assert-array-decode (make-octets 131072 :element 11) 1024))
+ (assert-array-decode (make-octets 131072 :element 11) 1024)
+ (for-all ((buffer (gen-buffer :length (gen-integer :min 0 :max 40000))))
+ (assert-array-decode buffer 1024)))
(test simple-decoding-32kib
(assert-array-decode (make-octets 1 :element 2) 32kib)
@@ -104,7 +106,9 @@
(assert-array-decode (make-octets 32768 :element 2) 32kib)
(assert-array-decode (make-octets 32769 :element 2) 32kib)
(assert-array-decode (make-octets 32768 :element 2) 32kib)
- (assert-array-decode (make-octets 16777216 :element 2) 32kib))
+ (assert-array-decode (make-octets 16777216 :element 2) 32kib)
+ (for-all ((buffer (gen-buffer :length (gen-integer :min 0 :max 70000))))
+ (assert-array-decode buffer 32kib)))
(test proper-return-values
(assert-bytes-read (make-octets 1 :element 3) 1024 (1))
@@ -127,12 +131,6 @@
(stream (eris-decode read-capability #'hashtable-decode)))
(setf (stream-file-position stream) ,pos)
(stream-read-sequence stream buf 0 (length buf))
- ;; (print (pos (buffer stream)))
- ;; (print (+ 24 ,buffer-pos))
- ;; (print (pos stream))
- ;; (print (+ 24 ,pos))
- ;; (print buf)
- ;; (print ,array-at-pos)
(is (and
(eql (eris::pos (eris::buffer stream))
(+ 24 ,buffer-pos))
@@ -200,7 +198,9 @@
(assert-length (make-array 1024 :element-type '(unsigned-byte 8) :initial-element 2) 1024)
(assert-length (make-array 2048 :element-type '(unsigned-byte 8) :initial-element 2) 1024)
(assert-length (make-array 16383 :element-type '(unsigned-byte 8) :initial-element 2) 1024)
- (assert-length (make-array 16384 :element-type '(unsigned-byte 8) :initial-element 2) 1024))
+ (assert-length (make-array 16384 :element-type '(unsigned-byte 8) :initial-element 2) 1024)
+ (for-all ((buffer (gen-buffer :length (gen-integer :min 0 :max 40000))))
+ (assert-length buffer 1024)))
(defmacro assert-read-byte (array block-size)
diff --git a/tests/encode-tests.lisp b/tests/encode-tests.lisp
index abbeb0d..b4c6892 100644
--- a/tests/encode-tests.lisp
+++ b/tests/encode-tests.lisp
@@ -16,6 +16,7 @@
(in-package :eris/test)
(def-suite* encoding-tests :in eris-tests)
+
(defmacro check-urn (data block-size urn &key (secret null-secret))
`(let ((urn ,urn)
(vector-encode (read-capability-to-urn
@@ -71,17 +72,120 @@
,urn)))))
(test 100MiB
- (large-content-test (make-array 24 :element-type '(unsigned-byte 8)
- :initial-contents
- #(49 48 48 77 105 66 32 40 98 108 111 99 107 32 115 105 122 101 32 49 75 105 66 41))
- 1024
- "urn:eris:BIC6F5EKY2PMXS2VNOKPD3AJGKTQBD3EXSCSLZIENXAXBM7PCTH2TCMF5OKJWAN36N4DFO6JPFZBR3MS7ECOGDYDERIJJ4N5KAQSZS67YY"
- 104857600))
+ (large-content-test
+ (make-array 24 :element-type '(unsigned-byte 8)
+ :initial-contents
+ #(49 48 48 77 105 66 32 40 98 108 111 99 107 32 115 105 122 101 32 49 75 105 66 41))
+ 1024
+ "urn:eris:BIC6F5EKY2PMXS2VNOKPD3AJGKTQBD3EXSCSLZIENXAXBM7PCTH2TCMF5OKJWAN36N4DFO6JPFZBR3MS7ECOGDYDERIJJ4N5KAQSZS67YY"
+ 104857600))
(test 1GiB
- (large-content-test (make-array 23 :element-type '(unsigned-byte 8)
- :initial-contents
- #(49 71 105 66 32 40 98 108 111 99 107 32 115 105 122 101 32 51 50 75 105 66 41))
- 32kib
- "urn:eris:B4BL4DKSEOPGMYS2CU2OFNYCH4BGQT774GXKGURLFO5FDXAQQPJGJ35AZR3PEK6CVCV74FVTAXHRSWLUUNYYA46ZPOPDOV2M5NVLBETWVI"
- 1073741824))
+ (large-content-test
+ (make-array 23 :element-type '(unsigned-byte 8)
+ :initial-contents
+ #(49 71 105 66 32 40 98 108 111 99 107 32 115 105 122 101 32 51 50 75 105 66 41))
+ 32kib
+ "urn:eris:B4BL4DKSEOPGMYS2CU2OFNYCH4BGQT774GXKGURLFO5FDXAQQPJGJ35AZR3PEK6CVCV74FVTAXHRSWLUUNYYA46ZPOPDOV2M5NVLBETWVI"
+ 1073741824))
+
+
+(defmacro encode-consensus-test (tmpdir data block-size &key (secret (random-data 32)))
+ "Test if all the eris-encode methods give the same results."
+ `(let ((pathname-encode
+ (let ((pathname (merge-pathnames (crypto:byte-array-to-hex-string (crypto:random-data 16))
+ ,tmpdir)))
+ (with-open-file (f pathname
+ :direction :output
+ :element-type 'serapeum:octet
+ :if-does-not-exist :create)
+ (write-sequence ,data f))
+ (read-capability-to-urn
+ (eris-encode pathname
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret))))
+ (file-stream-encode
+ (let ((pathname (merge-pathnames (crypto:byte-array-to-hex-string (crypto:random-data 16))
+ ,tmpdir)))
+ (with-open-file (f pathname
+ :direction :output
+ :element-type 'serapeum:octet
+ :if-does-not-exist :create)
+ (write-sequence ,data f))
+ (read-capability-to-urn
+ (with-open-file (f pathname :direction :input
+ :element-type 'serapeum:octet)
+ (eris-encode f
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret)))))
+ (vector-encode
+ (read-capability-to-urn
+ (eris-encode ,data
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret)))
+ (stream-encode
+ (read-capability-to-urn
+ (with-octet-input-stream (stream ,data)
+ (eris-encode stream
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret)))))
+ (is (serapeum:equalp* vector-encode stream-encode pathname-encode file-stream-encode))))
+
+(test encoding-consensus-tests
+ (with-temporary-dir tdir
+ (encode-consensus-test tdir (make-octets 1 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 512 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 1023 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 1024 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 16383 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 16384 :element 2) 1024)
+ (encode-consensus-test tdir (make-octets 1024 :element 2) 32kib)
+ (encode-consensus-test tdir (make-octets 32767 :element 2) 32kib)
+ (encode-consensus-test tdir (make-octets 32768 :element 2) 32kib)
+ (encode-consensus-test tdir (make-octets 64000 :element 2) 32kib)
+ (for-all ((buffer (gen-buffer :length (gen-integer :min 1 :max 70000))))
+ (encode-consensus-test tdir buffer 1024)
+ (encode-consensus-test tdir buffer 32kib))))
+
+
+(test encoding-nothing
+ (with-temporary-dir tdir
+ (encode-consensus-test tdir (make-octets 0) 1024)
+ (encode-consensus-test tdir (make-octets 0) 32kib)))
+
+
+(defmacro encoding-file-pos (tmpdir data pos block-size &key (secret null-secret))
+ `(let ((vector-encode (read-capability-to-urn
+ (eris-encode (subseq ,data ,pos)
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret)))
+ (file-stream-encode
+ (let ((pathname (merge-pathnames (crypto:byte-array-to-hex-string (crypto:random-data 16))
+ ,tmpdir)))
+ (with-open-file (f pathname
+ :direction :output
+ :element-type 'serapeum:octet
+ :if-does-not-exist :create)
+ (write-sequence ,data f))
+ (read-capability-to-urn
+ (with-open-file (f pathname :direction :input
+ :element-type 'serapeum:octet)
+ (file-position f ,pos)
+ (eris-encode f
+ ,block-size
+ (lambda (block ref) (declare (ignore ref)) block)
+ :secret ,secret))))))
+ (is (equalp vector-encode file-stream-encode))))
+
+(test encoding-file-position-tests
+ (with-temporary-dir tdir
+ (encoding-file-pos tdir (make-octets 1024 :element 2) 512 1024)
+ (encoding-file-pos tdir (make-octets 1024 :element 2) 1023 1024)
+ (encoding-file-pos tdir (make-octets 1024 :element 2) 1 1024)
+ (encoding-file-pos tdir (make-octets 32000 :element 2) 1673 32kib)
+ (encoding-file-pos tdir (make-octets 32000 :element 2) 31999 32kib)))