From de23bd8fa5db84424faed6326c9a57c35c20a955 Mon Sep 17 00:00:00 2001 From: Ekaitz Zarraga Date: Tue, 6 Feb 2024 22:24:03 +0100 Subject: par: piece-table: Implement sentinel pieces Now the pieces are a circular doubly linked list with a sentinel piece --- par/piece-table.scm | 277 +++++++++++++++++++++++++++++--------------------- par/piece-table.sld | 4 +- tests/piece-table.scm | 13 +++ 3 files changed, 178 insertions(+), 116 deletions(-) diff --git a/par/piece-table.scm b/par/piece-table.scm index 65e733f..54cb9a9 100644 --- a/par/piece-table.scm +++ b/par/piece-table.scm @@ -19,8 +19,37 @@ (length piece-length set-piece-length!) (type piece-type set-piece-type!) (linebreaks piece-linebreaks set-piece-linebreaks!) - (next piece-next set-piece-next!) - (prev piece-prev set-piece-prev!)) + (next %piece-next %set-piece-next!) + (prev %piece-prev %set-piece-prev!)) + +(define-record-type + (make-sentinel-piece next prev) + sentinel-piece? + (next %sentinel-piece-next %set-sentinel-piece-next!) + (prev %sentinel-piece-prev %set-sentinel-piece-prev!)) + +;; TODO think about a way to make all this polymorphism automagic +(define (piece-next p) + (cond + ((sentinel-piece? p) (%sentinel-piece-next p)) + ((piece? p) (%piece-next p)))) + +(define (piece-prev p) + (cond + ((sentinel-piece? p) (%sentinel-piece-prev p)) + ((piece? p) (%piece-prev p)))) + +(define (set-piece-prev! p prev) + (cond + ((sentinel-piece? p) (%set-sentinel-piece-prev! p prev)) + ((piece? p) (%set-piece-prev! p prev)))) + +(define (set-piece-next! p next) + (cond + ((sentinel-piece? p) (%set-sentinel-piece-next! p next)) + ((piece? p) (%set-piece-next! p next)))) +;; + (define (index-linebreaks buffer start length) (define str (buffer->string buffer)) @@ -38,13 +67,9 @@ start length) (list->vector (reverse outlist))) -(define (make-piece buffer start length type prev next) - (%make-piece buffer start length type (index-linebreaks buffer start length) - prev next)) - (define (can-merge-pieces? a b) ;; Maybe simplify the and/or/not magic? - (and (not (or (null? a) (null? b))) + (and (not (or (sentinel-piece? a) (sentinel-piece? b))) (or (or (= 0 (piece-length a)) (= 0 (piece-length b))) (and (eq? (piece-buffer a) (piece-buffer b)) @@ -58,14 +83,7 @@ (set-piece-linebreaks! a (vector-append (piece-linebreaks a) (piece-linebreaks b))) (set-piece-next! a (piece-next b)) - (unless (null? (piece-next b)) (set-piece-prev! (piece-next b) a))) - -(define (connect-pieces! a b) - (if (can-merge-pieces? a b) - (merge-pieces! a b) - (begin - (set-piece-next! a b) - (set-piece-prev! b a)))) + (unless (sentinel-piece? (piece-next b)) (set-piece-prev! (piece-next b) a))) (define (split-linebreaks linebreaks at) (let* ((lb-l (vector-length linebreaks)) @@ -78,20 +96,34 @@ (vector-map (lambda (x) (- x at)) (vector-copy linebreaks lb-at lb-l))))) +;; Piece API +(define (make-piece buffer start length type prev next) + (%make-piece buffer start length type (index-linebreaks buffer start length) + prev next)) + +(define (connect-pieces! a b) + (begin + (set-piece-next! a b) + (set-piece-prev! b a))) + (define (split-piece! a at) - (let-values (((lb-a lb-b) (split-linebreaks (piece-linebreaks a) at))) - (letrec* ((b (%make-piece (piece-buffer a) - (+ at (piece-start a)) - (- (piece-length a) at) - (piece-type a) - lb-b - (piece-next a) - a))) - (unless (null? (piece-next a)) (set-piece-prev! (piece-next a) b)) - (set-piece-length! a at) - (set-piece-linebreaks! a lb-a) - (set-piece-next! a b) - (values a b)))) + (if (and (= at 0) (sentinel-piece? a)) + (values a a) ;; Just return the sentinel twice, as this is only used to + ;; take one of them + (let-values (((lb-a lb-b) (split-linebreaks (piece-linebreaks a) at))) + (letrec* ((b (%make-piece (piece-buffer a) + (+ at (piece-start a)) + (- (piece-length a) at) + (piece-type a) + lb-b + (piece-next a) + a))) + (unless (sentinel-piece? (piece-next a)) + (set-piece-prev! (piece-next a) b)) + (set-piece-length! a at) + (set-piece-linebreaks! a lb-a) + (set-piece-next! a b) + (values a b))))) @@ -114,11 +146,13 @@ (string add-buffer-string set-add-buffer-string!) (used add-buffer-used set-add-buffer-used!)) +;; TODO think about a way to make all this polymorphism automagic (define (buffer->string buffer) "Returns the underlying string of the buffer" (cond ((add-buffer? buffer) (add-buffer-string buffer)) - ((ro-buffer? buffer) (ro-buffer-string buffer)))) + ((ro-buffer? buffer) (ro-buffer-string buffer)))) +;; (define add-buffer-length (make-parameter 100)) @@ -134,24 +168,14 @@ (string-copy! new 0 str))) (define (add-buffer-append! add-buffer str) - "Appends to add buffer, growing if necessary and returns the resulting piece - as a template function" + "Appends to add buffer, growing if necessary" (let ((append-len (string-length str)) (buffer-used (add-buffer-used add-buffer)) (buffer-size (string-length (add-buffer-string add-buffer)))) (when (>= (+ append-len buffer-used) buffer-size) (enlarge-add-buffer! add-buffer append-len)) (string-copy! (add-buffer-string add-buffer) buffer-used str) - (set-add-buffer-used! add-buffer (+ append-len buffer-used)) - - ;; TODO: make this another way? maybe it's fine to do it like this... - (lambda (type) - (make-piece add-buffer - buffer-used - (string-length str) - type - (list) - (list))))) + (set-add-buffer-used! add-buffer (+ append-len buffer-used)))) @@ -167,26 +191,43 @@ (define (make-piece-table original) - (let ((ro-buffer (make-ro-buffer original))) + (letrec* ((ro-buffer (make-ro-buffer original)) + (piece (make-piece ro-buffer 0 (string-length original) 'normal + sentinel sentinel)) + (sentinel (make-sentinel-piece piece piece))) + (set-piece-prev! piece sentinel) + (set-piece-next! piece sentinel) (%make-piece-table - ro-buffer (make-add-buffer) - (make-piece ro-buffer 0 (string-length original) 'normal (list) (list))))) + ro-buffer (make-add-buffer) piece))) + +(define (piece-table-index->piece+index piece-table pos) + "Returns (values piece remainder) or eof-object" + (let loop ((piece (piece-table-piece piece-table)) + (rem pos)) + (cond + ((sentinel-piece? piece) + (if (= rem 0) + (values piece rem) + (error "Requested position out of piece-table"))) + ((< rem (piece-length piece)) + (values piece rem)) + (else + (loop (piece-next piece) (- rem (piece-length piece))))))) + + + +;; API (define (piece-table-index piece-table pos) ;; TODO: Validate input - (let loop ((p (piece-table-piece piece-table)) - (start 0)) - (let* ((end (piece-length p))) - (if (<= start pos (+ start end -1)) - (string-ref (buffer->string (piece-buffer p)) (- pos start)) - (if (not (null? (piece-next p))) - (loop (piece-next p) (+ start end)) - (eof-object)))))) + (let-values (((piece idx) (piece-table-index->piece+index piece-table pos))) + (string-ref (buffer->string (piece-buffer piece)) + (+ (piece-start piece) idx)))) (define (piece-table-text-length piece-table) (let loop ((piece (piece-table-piece piece-table)) (acc 0)) - (if (null? (piece-next piece)) + (if (sentinel-piece? (piece-next piece)) (+ acc (piece-length piece)) (loop (piece-next piece) (+ (piece-length piece) acc))))) @@ -202,7 +243,7 @@ (piece (piece-table-piece piece-table))) (let* ((len (piece-length piece)) (end (+ start len)) - (more? (not (null? (piece-next piece))))) + (more? (not (sentinel-piece? (piece-next piece))))) (when (< idx to) ;; Skip tail (if (or (= idx end) (<= end from)) ;; Current piece doesn't contain the section we need or finished @@ -220,44 +261,12 @@ ;; There are chars to process in this piece (loop (+ idx 1) start piece))))))) -(define (piece-table-find-line-break piece-table idx) - (let loop ((idx 0) - (piece (piece-table-piece piece-table)) - (rem idx)) - (let* ((len (piece-length piece)) - (lines (piece-linebreaks piece)) - (lc (vector-length lines))) - (cond - ((< rem lc) - (+ idx (vector-ref lines rem))) - ;; TODO: No more pieces, return the end or eof? - ((null? (piece-next piece)) - (eof-object)) - (else - (loop (+ idx len) (piece-next piece) (- rem lc))))))) - -(define (piece-table-index->piece+index piece-table pos) - "Returns (values piece remainder) or eof-object" - (let loop ((piece (piece-table-piece piece-table)) - (rem pos)) - (let ((len (piece-length piece))) - (cond - ((<= rem len) - (values piece rem)) - ((null? (piece-next piece)) - ;; TODO: No more pieces, return the end or eof? - (error "Requested position out of piece-table")) - (else - (loop (piece-next piece) (- rem len))))))) +(define (piece-table-for-each-right piece-table f from to) + "-right version of piece-table-for-each, it finds the tail and goes + backwards" + ;; TODO: not implemented yet + ) -(define (piece-table-piece-index piece-table i) - (let ((piece-prev/next (if (< i 0) piece-prev piece-next)) - (inc/dec (if (< i 0) - +))) - (let loop ((piece (piece-table-piece piece-table)) - (idx 0)) - (if (= idx i) - piece - (loop (piece-prev/next piece) (inc/dec idx 1)))))) (define (piece-table-insert! piece-table pos str type) ;; TODO: validate input @@ -265,35 +274,72 @@ ;; there? Is this maximum evilness? => it's just a string-ref and it prevents ;; us from creating pieces! - ;; TODO Maybe remove this? - (define candidate ((add-buffer-append! - (piece-table-add-buffer piece-table) str) - type)) - - (unless (= 0 (string-length str)) - (let*-values (((piece idx) (piece-table-index->piece+index piece-table pos))) - (if (and (= idx (piece-length piece)) - (can-merge-pieces? piece candidate)) - (merge-pieces! piece candidate) - (let-values (((first second) (split-piece! piece idx))) - (connect-pieces! first candidate) - (connect-pieces! candidate second)))))) + (let* ((add-buffer (piece-table-add-buffer piece-table)) + (start (add-buffer-used add-buffer)) + (_ (add-buffer-append! add-buffer str)) + (candidate (make-piece add-buffer + start + (string-length str) + type + #f #f))) + (unless (= 0 (string-length str)) + (let*-values (((piece idx) + (piece-table-index->piece+index piece-table pos))) + (cond + ((= idx 0) + (connect-pieces! (piece-prev piece) candidate) + (connect-pieces! candidate piece) + (when (can-merge-pieces? (piece-prev piece) candidate) + (merge-pieces! (piece-prev piece) candidate))) + (else + (let-values (((first second) (split-piece! piece idx))) + (connect-pieces! first candidate) + (connect-pieces! candidate second) + (when (can-merge-pieces? first candidate) + (merge-pieces! first candidate))))))))) (define (piece-table-delete! piece-table from len) - ;; TODO: Validate input (unless (= 0 len) (let*-values (((start-piece idx) (piece-table-index->piece+index piece-table from)) ((first second) (split-piece! start-piece idx)) - ;; We can search from the `second` here, because it's going - ;; to be at least in second, and we only iterate once that - ;; way ((end-piece end-idx) (piece-table-index->piece+index piece-table (+ from len))) ((third fourth) (split-piece! end-piece end-idx))) (connect-pieces! first fourth)))) +;; Finding: +;; NOTE: Line breaks are optimized. + +(define (piece-table-find-line-break piece-table idx) + (let loop ((idx 0) + (piece (piece-table-piece piece-table)) + (rem idx)) + (let* ((len (piece-length piece)) + (lines (piece-linebreaks piece)) + (lc (vector-length lines))) + (cond + ((< rem lc) + (+ idx (vector-ref lines rem))) + ;; TODO: No more pieces, return the end or eof? + ((sentinel-piece? (piece-next piece)) + (eof-object)) + (else + (loop (+ idx len) (piece-next piece) (- rem lc))))))) + +(define (piece-table-find piece-table char from) + ;; TODO: generalize the from + (call/cc (lambda (cont) + (piece-table-for-each + piece-table + (lambda (c i p) + (when (char=? c char) + (cont i))) + from + (piece-table-text-length piece-table)) + (cont (eof-object))))) + ;; Serialization - Deserialization @@ -313,15 +359,16 @@ (let ((out-string (make-string (- to from)))) (let loop ((piece (piece-table-piece piece-table)) (acc 0)) - (if (or (null? piece) (>= acc to)) + (if (or (sentinel-piece? piece) (>= acc to)) out-string (begin - (string-copy! - out-string - (max (- acc from) 0) - (buffer->string (piece-buffer piece)) - (+ (piece-start piece) (max (- from acc) 0)) - (+ (piece-start piece) (min (- to acc) (piece-length piece)))) + (when (>= (+ (piece-length piece) acc) from) + (string-copy! + out-string + (max (- acc from) 0) + (buffer->string (piece-buffer piece)) + (+ (piece-start piece) (max (- from acc) 0)) + (+ (piece-start piece) (min (- to acc) (piece-length piece))))) (loop (piece-next piece) (+ acc (piece-length piece)))))))) (define (piece-table->string piece-table) diff --git a/par/piece-table.sld b/par/piece-table.sld index c197181..7209baf 100644 --- a/par/piece-table.sld +++ b/par/piece-table.sld @@ -3,6 +3,7 @@ (srfi 11)) (export make-piece-table piece-table-index + piece-table-text-length piece-table-insert! piece-table-delete! piece-table->string @@ -10,5 +11,6 @@ string->piece-table add-buffer-length piece-table-line - piece-table-for-each) + piece-table-for-each + piece-table-find) (include "piece-table.scm")) diff --git a/tests/piece-table.scm b/tests/piece-table.scm index 76176a9..62ff11d 100644 --- a/tests/piece-table.scm +++ b/tests/piece-table.scm @@ -7,9 +7,17 @@ (test-begin "index") (define table (make-piece-table "HOLA")) + (test-equal #\H (piece-table-index table 0)) + (test-equal #\O (piece-table-index table 1)) (test-equal #\L (piece-table-index table 2)) + (test-equal #\A (piece-table-index table 3)) (test-end "index") +(test-begin "length") + (define table (make-piece-table "HOLA")) + (test-equal 4 (piece-table-text-length table)) +(test-end "length") + (test-begin "insert") (define table (make-piece-table "HOLA")) (piece-table-insert! table 4 "9" 'normal) @@ -61,3 +69,8 @@ 0 10) (test-end "for-each") + +(test-begin "find") + (define table (make-piece-table "1234567890")) + (test-equal 8 (piece-table-find table #\9 0)) +(test-end "find") -- cgit v1.2.3