Doc2Vec的主要目的是将任意文档与标签关联,因此需要标签。Doc2Vec是Word2Vec的一个扩展,它学习关联标签和单词,而不是用单词关联单词。DL4J实现它的意图是为了服务于Java、Scala和Culjule社区。
.labels(Arrays.asList("negative", "neutral","positive"))
public void testDifferentLabels() throws Exception {
ClassPathResource resource = new ClassPathResource("/labeled");
File file = resource.getFile();
LabelAwareSentenceIterator iter = LabelAwareUimaSentenceIterator.createWithPath(file.getAbsolutePath());
TokenizerFactory t = new UimaTokenizerFactory();
ParagraphVectors vec = new ParagraphVectors.Builder()
.minWordFrequency(1).labels(Arrays.asList("negative", "neutral","positive"))
.layerSize(100)
.stopWords(new ArrayList<String>())
.windowSize(5).iterate(iter).tokenizerFactory(t).build();
vec.fit();
assertNotEquals(vec.lookupTable().vector("UNK"), vec.lookupTable().vector("negative"));
assertNotEquals(vec.lookupTable().vector("UNK"),vec.lookupTable().vector("positive"));
assertNotEquals(vec.lookupTable().vector("UNK"),vec.lookupTable().vector("neutral"));}