Fix model loading
Change-Id: I6c38d1e7983d3fc4adcf51255ce50db0ffc5f3db
diff --git a/src/DerekoVecs.jl b/src/DerekoVecs.jl
index 7b66d09..f53b99c 100644
--- a/src/DerekoVecs.jl
+++ b/src/DerekoVecs.jl
@@ -34,25 +34,25 @@
if (occursin(r".vecs$", modelfn))
(n, d) = map(s -> parse(Int, s), split(readline(modelfn), " "))
vocabfn = replace(modelfn, ".vecs" => ".vocab")
- sizefn = replace(modelfn, ".vecs" => ".size")
file = readdlm(vocabfn, ' ', String, dims=(n, 2), quotes=false)
- vocab = file[:, 1]
- total = if (isfile(sizefn)) # .size-file with corrected token count?
- open(sizefn) do io
- readline(io)
- parse(Int, readline(io))
- end
- else
- sum(map(x -> parse(Int64, x), file[:, 2]))
- end
else
delim = ('\t' in readline(modelfn) ? '\t' : ' ')
file = readdlm(modelfn, delim, String, quotes=false)
- vocab = file[:, 1]
- n = length(vocab)
- total = sum(map(x -> parse(Int64, x), file[:, 2]))
end
+ vocab = file[:, 1]
+ n = length(vocab)
+
+ sizefn = replace(modelfn, r"\.[^.]+" => s".size")
+ total = if (isfile(sizefn)) # .size-file with corrected token count?
+ open(sizefn) do io
+ readline(io)
+ parse(Int, readline(io))
+ end
+ else
+ sum(map(x -> parse(Int64, x), file[:, 2]))
+ end
+
freqs = map(x -> parse(Float64, x) / total, file[:, 2])
vocabdict = Dict{String,Int64}(zip(vocab, 1:n))
vecsfn = "$(modelfn).vecs"
@@ -154,7 +154,7 @@
kld(dictp, [target.total_tokens, bg.total_tokens])
end
-kld(targetfn::String, bgfn::String)::kldResult = kld(load(targetfn, load(bgfn)))
+kld(targetfn::String, bgfn::String)::kldResult = kld(load(targetfn), load(bgfn))
end