diff --git a/README.md b/README.md index 0f11bd35..a866454f 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ This is the PyTorch implementation of the [RotatE](https://openreview.net/forum?id=HkgEQnRqYQ) model for knowledge graph embedding (KGE). We provide a toolkit that gives state-of-the-art performance of several popular KGE models. The toolkit is quite efficient, which is able to train a large KGE model within a few hours on a single GPU. +A faster multi-GPU implementation of RotatE and other KGE models is available in [GraphVite](https://github.com/DeepGraphLearning/graphvite). + **Implemented features** Models: diff --git a/best_config.sh b/best_config.sh index f509de3a..5dad806b 100755 --- a/best_config.sh +++ b/best_config.sh @@ -41,11 +41,11 @@ bash run.sh train ComplEx countries_S3 0 0 512 64 1000 1.0 1.0 0.000002 40000 8 # # Best Configuration for DistMult # -bash run.sh train DistMult FB15k 0 0 1024 256 1000 500.0 1.0 0.001 150000 16 -de -dr -r 0.000002 -bash run.sh train DistMult FB15k-237 0 0 1024 256 1000 200.0 1.0 0.001 100000 16 -de -dr -r 0.00001 -bash run.sh train DistMult wn18 0 0 512 1024 500 200.0 1.0 0.001 80000 8 -de -dr -r 0.00001 -bash run.sh train DistMult wn18rr 0 0 512 1024 500 200.0 1.0 0.002 80000 8 -de -dr -r 0.000005 -bash run.sh train DistMult countries_S1 0 0 512 64 1000 1.0 1.0 0.000002 40000 8 -de -dr -r 0.0005 --countries -bash run.sh train DistMult countries_S2 0 0 512 64 1000 1.0 1.0 0.000002 40000 8 -de -dr -r 0.0005 --countries -bash run.sh train DistMult countries_S3 0 0 512 64 1000 1.0 1.0 0.000002 40000 8 -de -dr -r 0.0005 --countries -# \ No newline at end of file +bash run.sh train DistMult FB15k 0 0 1024 256 2000 500.0 1.0 0.001 150000 16 -r 0.000002 +bash run.sh train DistMult FB15k-237 0 0 1024 256 2000 200.0 1.0 0.001 100000 16 -r 0.00001 +bash run.sh train DistMult wn18 0 0 512 1024 1000 200.0 1.0 0.001 80000 8 -r 0.00001 +bash run.sh train DistMult wn18rr 0 0 512 1024 1000 200.0 1.0 0.002 80000 8 -r 0.000005 +bash run.sh train DistMult countries_S1 0 0 512 64 2000 1.0 1.0 0.000002 40000 8 -r 0.0005 --countries +bash run.sh train DistMult countries_S2 0 0 512 64 2000 1.0 1.0 0.000002 40000 8 -r 0.0005 --countries +bash run.sh train DistMult countries_S3 0 0 512 64 2000 1.0 1.0 0.000002 40000 8 -r 0.0005 --countries +# diff --git a/codes/dataloader.py b/codes/dataloader.py index 70d43a25..ed3f3492 100644 --- a/codes/dataloader.py +++ b/codes/dataloader.py @@ -59,8 +59,8 @@ def __getitem__(self, idx): negative_sample = np.concatenate(negative_sample_list)[:self.negative_sample_size] - negative_sample = torch.from_numpy(negative_sample) - + negative_sample = torch.LongTensor(negative_sample) + positive_sample = torch.LongTensor(positive_sample) return positive_sample, negative_sample, subsampling_weight, self.mode @@ -181,4 +181,4 @@ def one_shot_iterator(dataloader): ''' while True: for data in dataloader: - yield data \ No newline at end of file + yield data diff --git a/codes/model.py b/codes/model.py index 30762313..2459e71a 100644 --- a/codes/model.py +++ b/codes/model.py @@ -75,7 +75,7 @@ def forward(self, sample, mode='single'): In the 'head-batch' or 'tail-batch' mode, sample consists two part. The first part is usually the positive sample. And the second part is the entities in the negative samples. - Becuase negative samples and positive samples usually share two elements + Because negative samples and positive samples usually share two elements in their triple ((head, relation) or (relation, tail)). ''' @@ -267,7 +267,7 @@ def train_step(model, optimizer, train_iterator, args): negative_score = model((positive_sample, negative_sample), mode=mode) if args.negative_adversarial_sampling: - #In self-negative sampling, we do not apply back-propagation on the sampling weight + #In self-adversarial sampling, we do not apply back-propagation on the sampling weight negative_score = (F.softmax(negative_score * args.adversarial_temperature, dim = 1).detach() * F.logsigmoid(-negative_score)).sum(dim = 1) else: @@ -278,8 +278,8 @@ def train_step(model, optimizer, train_iterator, args): positive_score = F.logsigmoid(positive_score).squeeze(dim = 1) if args.uni_weight: - positive_sample_loss = positive_score.mean() - negative_sample_loss = negative_score.mean() + positive_sample_loss = - positive_score.mean() + negative_sample_loss = - negative_score.mean() else: positive_sample_loss = - (subsampling_weight * positive_score).sum()/subsampling_weight.sum() negative_sample_loss = - (subsampling_weight * negative_score).sum()/subsampling_weight.sum() diff --git a/codes/run.py b/codes/run.py index 9cc7d2e9..457c6fdf 100644 --- a/codes/run.py +++ b/codes/run.py @@ -284,7 +284,6 @@ def main(args): logging.info('Start Training...') logging.info('init_step = %d' % init_step) - logging.info('learning_rate = %d' % current_learning_rate) logging.info('batch_size = %d' % args.batch_size) logging.info('negative_adversarial_sampling = %d' % args.negative_adversarial_sampling) logging.info('hidden_dim = %d' % args.hidden_dim) @@ -296,6 +295,8 @@ def main(args): # Set valid dataloader as it would be evaluated during training if args.do_train: + logging.info('learning_rate = %d' % current_learning_rate) + training_logs = [] #Training Loop @@ -357,4 +358,4 @@ def main(args): log_metrics('Test', step, metrics) if __name__ == '__main__': - main(parse_args()) \ No newline at end of file + main(parse_args())