diff --git a/crates/vectord/src/chunker.rs b/crates/vectord/src/chunker.rs
index aed1a62..cba2ab4 100644
--- a/crates/vectord/src/chunker.rs
+++ b/crates/vectord/src/chunker.rs
@@ -15,9 +15,28 @@ pub struct TextChunk {
     pub text: String,
 }
 
+/// Round `idx` UP to the nearest UTF-8 char boundary in `s`.
+/// Needed because `&str[a..b]` panics if `a` or `b` lands mid-character —
+/// and any text imported from Postgres/user data will contain multi-byte
+/// chars like `\u{202f}` (narrow no-break space), em-dashes, etc.
+fn ceil_char_boundary(s: &str, mut idx: usize) -> usize {
+    if idx >= s.len() { return s.len(); }
+    while idx <= s.len() && !s.is_char_boundary(idx) {
+        idx += 1;
+    }
+    idx
+}
+
 /// Split text into overlapping chunks.
-/// - `chunk_size`: target characters per chunk (not tokens — chars are a good proxy)
-/// - `overlap`: characters of overlap between consecutive chunks
+/// - `chunk_size`: target bytes per chunk (UTF-8 boundary-respecting)
+/// - `overlap`: bytes of overlap between consecutive chunks
+///
+/// Safety contract:
+/// - Never slices at a byte that isn't a UTF-8 char boundary
+/// - Always advances forward (monotonic `start`) — no infinite loops on
+///   pathological inputs
+/// - Bounded iteration — caps total chunks at `text.len()` to prevent
+///   runaway allocation on degenerate text/parameter combinations
 pub fn chunk_text(
     text: &str,
     source: &str,
@@ -44,28 +63,44 @@ pub fn chunk_text(
     let mut start = 0;
     let mut idx = 0u32;
 
-    while start < text.len() {
-        let end = (start + chunk_size).min(text.len());
+    // Safety cap: a well-formed chunk_size advances `start` by at least
+    // `chunk_size - overlap` each iteration. Even in degenerate cases we
+    // should never emit more chunks than there are bytes of text.
+    let max_chunks = text.len();
+    let mut iterations = 0;
 
-        // Try to break at a sentence or paragraph boundary
-        let chunk_text = &text[start..end];
+    while start < text.len() {
+        iterations += 1;
+        if iterations > max_chunks {
+            tracing::warn!(
+                "chunker: exceeded safety cap ({}) on doc '{}' of length {} — stopping",
+                max_chunks, doc_id, text.len(),
+            );
+            break;
+        }
+
+        // UTF-8 safe upper bound: move forward to a char boundary.
+        let end = ceil_char_boundary(text, (start + chunk_size).min(text.len()));
+
+        let chunk_text_slice = &text[start..end];
         let actual_end = if end < text.len() {
-            // Look for last sentence boundary in the chunk
-            if let Some(pos) = chunk_text.rfind(". ") {
+            // Look for last sentence boundary inside the slice.
+            if let Some(pos) = chunk_text_slice.rfind(". ") {
                 start + pos + 2
-            } else if let Some(pos) = chunk_text.rfind('\n') {
+            } else if let Some(pos) = chunk_text_slice.rfind('\n') {
+                start + pos + 1
+            } else if let Some(pos) = chunk_text_slice.rfind(' ') {
                 start + pos + 1
             } else {
-                // Fall back to word boundary
-                if let Some(pos) = chunk_text.rfind(' ') {
-                    start + pos + 1
-                } else {
-                    end
-                }
+                end
             }
         } else {
             end
         };
+        // All the rfind'd positions are at char boundaries (they match
+        // ASCII byte patterns), so actual_end should already be safe.
+        // Defensive belt-and-suspenders: round up anyway.
+        let actual_end = ceil_char_boundary(text, actual_end);
 
         let chunk = text[start..actual_end].trim();
         if !chunk.is_empty() {
@@ -78,11 +113,24 @@ pub fn chunk_text(
             idx += 1;
         }
 
-        // Advance with overlap
+        // Advance with overlap — must strictly progress to avoid infinite
+        // loops on degenerate inputs (e.g. chunk_size ≤ overlap, or
+        // boundary-finding returning a position at `start`).
         if actual_end >= text.len() {
             break;
         }
-        start = if actual_end > overlap { actual_end - overlap } else { actual_end };
+        let tentative = if actual_end > overlap { actual_end - overlap } else { actual_end };
+        let next_start = ceil_char_boundary(text, tentative);
+        if next_start <= start {
+            // No progress — force forward one char to guarantee termination.
+            let mut forced = start + 1;
+            while forced < text.len() && !text.is_char_boundary(forced) {
+                forced += 1;
+            }
+            start = forced;
+        } else {
+            start = next_start;
+        }
     }
 
     chunks
@@ -132,4 +180,45 @@ mod tests {
         assert_eq!(chunks[0].source, "candidates");
         assert_eq!(chunks[0].doc_id, "CAND-001");
     }
+
+    #[test]
+    fn handles_multibyte_utf8_at_chunk_boundary() {
+        // Regression: multi-byte unicode (narrow no-break space \u{202f},
+        // 3 bytes) landing inside a chunk-size window used to panic with
+        // "byte index N is not a char boundary". Found on pg-imported
+        // data that contained editorial unicode punctuation.
+        let text = "**Claim 1**\u{202f}\u{2013} *Each large language model has its own personality \
+                   and way of interacting.*\u{202f}\u{2013}\u{202f}Verdict:\u{202f}VERIFIED. \
+                   Reasoning: widely accepted observation that different models exhibit distinct response \
+                   styles stemming from variations in training data.".repeat(5);
+        let chunks = chunk_text(&text, "test", "unicode-1", 500, 50);
+        // Must not panic, must produce at least one chunk.
+        assert!(!chunks.is_empty());
+        // All chunks must be valid UTF-8 substrings of the original.
+        for chunk in &chunks {
+            assert!(!chunk.text.is_empty());
+            assert!(text.contains(&chunk.text));
+        }
+    }
+
+    #[test]
+    fn no_infinite_loop_on_no_spaces() {
+        // Long text with no spaces, sentences, or newlines — chunker must
+        // still advance via the hard `end` fallback.
+        let text = "a".repeat(5000);
+        let chunks = chunk_text(&text, "test", "noSpace", 500, 50);
+        assert!(!chunks.is_empty());
+        // Expected ~5000/450 chunks (with overlap=50 → step=450)
+        assert!(chunks.len() < 50, "chunk count {} seems excessive", chunks.len());
+    }
+
+    #[test]
+    fn no_infinite_loop_on_degenerate_params() {
+        // chunk_size equals overlap — previously would have made zero
+        // forward progress. Our safety forced-advance must catch this.
+        let text = "a b c d e f g h i j k l m n o p q r s t u v w x y z".repeat(20);
+        let chunks = chunk_text(&text, "test", "degen", 100, 100);
+        assert!(!chunks.is_empty());
+        assert!(chunks.len() <= text.len(), "runaway chunk count {}", chunks.len());
+    }
 }