#!/usr/bin/env stack -- stack --resolver lts-11.10 script {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoImplicitPrelude #-} import Control.Monad (replicateM) import Data.Aeson import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import qualified Data.HashMap.Strict as HM import Data.Hashable (hash) import Data.List (intersperse) import qualified Data.Text as T import Data.Text.Encoding import RIO import System.Environment (getArgs) import System.Exit (die) import Prelude (print) main :: IO () main = do args <- getArgs arg <- case args of [] -> die "Supply the file with colliding strings as an argument" (a : _) -> pure a bs <- B.readFile arg let ls = filter (not . B.null) $ B.lines bs texts' <- fmap catMaybes $ forM ls $ \l' -> do let l = if even (B.length l') then l' else l' <> "\0" res <- tryAnyDeep $ return $ decodeUtf16LE (l :: B.ByteString) case res of Left e -> error $ show (l, e) Right x -> return $ Just x let texts = concatMap extend texts' let obj = Object $ HM.fromList $ map (\t -> (t, Number 0)) texts print $ length texts let hashes = map hash texts let randoms = take (2 * length texts) $ map T.pack $ replicateM 20 ['A' .. 'Z'] print $ HM.fromListWith (+) $ map (\h -> (h, 1)) hashes withBinaryFile "collide.json" WriteMode $ \h -> hPutBuilder h $ buildJSON texts withBinaryFile "no-collide.json" WriteMode $ \h -> hPutBuilder h $ buildJSON randoms extend :: T.Text -> [T.Text] extend text = map foo [0 .. 3] where foo len = T.append text $ T.replicate len "\0" buildJSON :: [T.Text] -> Builder buildJSON ts = "{" <> fold (intersperse "," (map toPair ts)) <> "}" toPair :: T.Text -> Builder toPair text = fromEncoding (toEncoding text) <> ":0"