From 25aa89fbdf11595710c725d290ae02244b4716fb Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Thu, 11 Feb 2016 13:31:24 +0100 Subject: [PATCH 01/81] aded lamabadge --- README.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fdb23a9..74971a5 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,13 @@ +.. image:: http://applejack.science.ru.nl/lamabadge.php/python-timbl + :target: http://applejack.science.ru.nl/languagemachines/ + ====================== README: python-timbl ====================== :Authors: Sander Canisius, Maarten van Gompel :Contact: proycon@anaproy.nl -:Web site: http://github.com/proycon/python-timbl/ +:Web site: https://github.com/proycon/python-timbl/ python-timbl is a Python extension module wrapping the full TiMBL C++ programming interface. With this module, all functionality exposed @@ -27,7 +30,7 @@ Installation ============ python-timbl is distributed as part of **LaMachine** -(https://github.com/proycon/lamachine), which significantly simplifies +(https://proycon.github.io/LaMachine), which significantly simplifies compilation and installation. The remainder of the instructions in this section refer to manual compilation and installation. From 09ce236405a08fad34697c77b0939bfd1d7f1dd6 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Sun, 13 Mar 2016 22:42:58 +0100 Subject: [PATCH 02/81] Changed pthread_mutex_init macro to function call (hoping Mac OS X won't choke on it like it did on the macro) --- src/timblapi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/timblapi.h b/src/timblapi.h index cfdf824..546877c 100644 --- a/src/timblapi.h +++ b/src/timblapi.h @@ -69,7 +69,12 @@ class TimblApiWrapper : public Timbl::TimblAPI { bool debug; int runningthreads; public: - TimblApiWrapper(const std::string& args, const std::string& name="") : Timbl::TimblAPI(args, name) { detachedexp = NULL; debug = false; runningthreads = 0; lock = PTHREAD_MUTEX_INITIALIZER;} + TimblApiWrapper(const std::string& args, const std::string& name="") : Timbl::TimblAPI(args, name) { + detachedexp = NULL; + debug = false; + runningthreads = 0; + pthread_mutex_init(&lock, NULL); + } ~TimblApiWrapper() { if (debug) std::cerr << "TimblApiWrapper Destructor" << std::endl; if (runningthreads == 0) { From 668a42d5ee170dcb559f416cdf64d029f5be498e Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Sun, 13 Mar 2016 22:48:26 +0100 Subject: [PATCH 03/81] version bump after Mac OS X fix --- setup2.py | 2 +- setup3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup2.py b/setup2.py index 2065094..c1821f9 100755 --- a/setup2.py +++ b/setup2.py @@ -126,7 +126,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2015.09.05", + version="2016.03.13", description="Python language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index e9664f8..3943489 100755 --- a/setup3.py +++ b/setup3.py @@ -124,7 +124,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2015.09.05", + version="2016.03.13", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 66c8d98a81e5df69db68ab542791011f05baa1bc Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Thu, 2 Jun 2016 10:02:22 +0200 Subject: [PATCH 04/81] make_tuple() was suddenly ambiguous (std:: namespace instead of boost::python), made explicit now, should solve issue proycon/LaMachine#14 --- setup2.py | 2 +- setup3.py | 4 ++-- src/timblapi.cc | 16 ++++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/setup2.py b/setup2.py index c1821f9..8c01dd7 100755 --- a/setup2.py +++ b/setup2.py @@ -126,7 +126,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2016.03.13", + version="2016.06.02", description="Python language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 3943489..7603b00 100755 --- a/setup3.py +++ b/setup3.py @@ -100,7 +100,7 @@ def build_extensions(self): #probably goes wrong if this is for python 2! boostlib = "boost_python" elif os.path.exists(self.boost_library_dir + "/libboost_python3.dylib"): #Mac OS X - boostlib = "boost_python3" + boostlib = "boost_python3" elif os.path.exists(self.boost_library_dir + "/libboost_python.dylib"): #Mac OS X #probably goes wrong if this is for python 2! boostlib = "boost_python" @@ -124,7 +124,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2016.03.13", + version="2016.06.02", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/src/timblapi.cc b/src/timblapi.cc index 4f55c35..1f7370f 100755 --- a/src/timblapi.cc +++ b/src/timblapi.cc @@ -69,7 +69,7 @@ tuple TimblApiWrapper::classify(const std::string& line) { std::string cls; bool result = Classify(line, cls); - return make_tuple(result, cls); + return boost::python::make_tuple(result, cls); } @@ -78,7 +78,7 @@ tuple TimblApiWrapper::classify2(const std::string& line) std::string cls; double distance; bool result = Classify(line, cls, distance); - return make_tuple(result, cls, distance); + return boost::python::make_tuple(result, cls, distance); } @@ -90,13 +90,13 @@ tuple TimblApiWrapper::classify3(const std::string& line, bool normalize, const const Timbl::TargetValue * result = Classify(line, distrib , distance); if (result != NULL) { if ((requireddepth > 0) && (matchDepth() < requireddepth)) { - return make_tuple(true, "", python::dict(), 999999); + return boost::python::make_tuple(true, "", python::dict(), 999999); } else { const std::string cls = result->Name(); - return make_tuple(true, cls, dist2dict(distrib, normalize), distance); + return boost::python::make_tuple(true, cls, dist2dict(distrib, normalize), distance); } } else { - return make_tuple(false,"",python::dict(),999999); + return boost::python::make_tuple(false,"",python::dict(),999999); } } @@ -148,20 +148,20 @@ tuple TimblApiWrapper::classify3safe(const std::string& line, bool normalize,con PyEval_RestoreThread(m_thread_state); m_thread_state = NULL; runningthreads--; - return make_tuple(true, "", python::dict(), 999999); + return boost::python::make_tuple(true, "", python::dict(), 999999); } else { const std::string cls = result->Name(); //const std::string diststring = distrib->DistToString(); PyEval_RestoreThread(m_thread_state); m_thread_state = NULL; runningthreads--; - return make_tuple(true, cls, dist2dict(distrib, normalize), distance); + return boost::python::make_tuple(true, cls, dist2dict(distrib, normalize), distance); } } else { PyEval_RestoreThread(m_thread_state); m_thread_state = NULL; runningthreads--; - return make_tuple(false,"",python::dict(),999999); + return boost::python::make_tuple(false,"",python::dict(),999999); } } From c8e1c136402c5a2f055141185f6bd1caff51196b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 3 Apr 2017 22:00:24 +0200 Subject: [PATCH 05/81] removed startServer method (was removed in timbl itself and not really used here anyway) --- src/docstrings.h.in | 19 ++----------------- src/timblapi.cc | 37 ++++++++++++++++++------------------- src/timblapi.h | 20 ++++++++++---------- 3 files changed, 30 insertions(+), 46 deletions(-) diff --git a/src/docstrings.h.in b/src/docstrings.h.in index db78212..87b9ef6 100755 --- a/src/docstrings.h.in +++ b/src/docstrings.h.in @@ -287,8 +287,8 @@ of the nearest neighbour for a given test instance. must match, zero (the default) corresponds to a top level distribution, higher values will result in no distribution being returned if the required depth is not reached, this improves performance. - - + + :return: (boolean signalling success or failure, the predicted class, class distribution, distance of the nearest neighbour) @@ -463,21 +463,6 @@ implementation """ -STARTSERVER_DOC = """ -self.startServer(port, maxConnections) - -Start a TiMBL server. - -:Parameters: - `port` : int - the TCP port on which to listen for connections - - `maxConnections` : int - the maximum number of simultaneous connections - -:return: boolean signalling success or failure -:rtype: bool -""" CURRENTWEIGHTING_DOC = """ diff --git a/src/timblapi.cc b/src/timblapi.cc index 1f7370f..1925db7 100755 --- a/src/timblapi.cc +++ b/src/timblapi.cc @@ -2,27 +2,27 @@ * Copyright (C) 2006-2015 Sander Canisius, Maarten van Gompel * * This file is part of python-timbl. - * + * * python-timbl is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. - * + * * python-timbl is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with python-timbl; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA - * + * * Linking python-timbl statically or dynamically with other modules * is making a combined work based on python-timbl. Thus, the terms * and conditions of the GNU General Public License cover the whole * combination. - * + * * In addition, as a special exception, the copyright holder of * python-timbl gives you permission to combine python-timbl with free * software programs or libraries that are released under the GNU LGPL @@ -33,7 +33,7 @@ * code concerned, provided that you include the source code of that * other code when and as the GNU GPL requires distribution of source * code. - * + * * Note that people who make modified versions of python-timbl are not * obligated to grant this special exception for their modified * versions; it is their choice whether to do so. The GNU General @@ -66,7 +66,7 @@ using namespace boost::python; tuple TimblApiWrapper::classify(const std::string& line) -{ +{ std::string cls; bool result = Classify(line, cls); return boost::python::make_tuple(result, cls); @@ -86,7 +86,7 @@ tuple TimblApiWrapper::classify3(const std::string& line, bool normalize, const { std::string cls; double distance; - const Timbl::ValueDistribution * distrib; + const Timbl::ValueDistribution * distrib; const Timbl::TargetValue * result = Classify(line, distrib , distance); if (result != NULL) { if ((requireddepth > 0) && (matchDepth() < requireddepth)) { @@ -137,10 +137,10 @@ tuple TimblApiWrapper::classify3safe(const std::string& line, bool normalize,con { runningthreads++; PyThreadState * m_thread_state = PyEval_SaveThread(); //release GIL - + Timbl::TimblExperiment * clonedexp = getexperimentforthread(); - const Timbl::ValueDistribution * distrib; + const Timbl::ValueDistribution * distrib; double distance; const Timbl::TargetValue * result = clonedexp->Classify(line, distrib,distance); if (result != NULL) { @@ -159,7 +159,7 @@ tuple TimblApiWrapper::classify3safe(const std::string& line, bool normalize,con } } else { PyEval_RestoreThread(m_thread_state); - m_thread_state = NULL; + m_thread_state = NULL; runningthreads--; return boost::python::make_tuple(false,"",python::dict(),999999); } @@ -177,7 +177,7 @@ bool TimblApiWrapper::showBestNeighbours(object& stream) { #ifdef __clang__ std::cerr << "showBestNeighbours is not implemented for clang" << std::endl; - return false; + return false; #else int fd = extract(stream.attr("fileno")()); __gnu_cxx::stdio_filebuf fdbuf(dup(fd), std::ios::out); @@ -199,7 +199,7 @@ bool TimblApiWrapper::showOptions(object& stream) { #ifdef __clang__ std::cerr << "showOptions is not implemented for clang" << std::endl; - return false; + return false; #else int fd = extract(stream.attr("fileno")()); __gnu_cxx::stdio_filebuf fdbuf(dup(fd), std::ios::out); @@ -222,12 +222,12 @@ void TimblApiWrapper::initthreading() { detachedexp = grabAndDisconnectExp(); } - + bool TimblApiWrapper::showSettings(object& stream) { #ifdef __clang__ std::cerr << "showSettings is not implemented for clang" << std::endl; - return false; + return false; #else int fd = extract(stream.attr("fileno")()); __gnu_cxx::stdio_filebuf fdbuf(dup(fd), std::ios::out); @@ -242,7 +242,7 @@ python::dict TimblApiWrapper::dist2dict(const Timbl::ValueDistribution * distrib size_t freq; - double maxfreq = 0; + double maxfreq = 0; if (normalize) { Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); @@ -315,7 +315,7 @@ BOOST_PYTHON_MODULE(timblapi) .def("saveWeights", &TimblApiWrapper::SaveWeights, SAVEWEIGHTS_DOC) .def("getWeights", &TimblApiWrapper::GetWeights, GETWEIGHTS_DOC) - + .def("getAccuracy", &TimblApiWrapper::GetAccuracy, GETACCURACY_DOC) .def("writeArrays", &TimblApiWrapper::WriteArrays, WRITEARRAYS_DOC) @@ -346,7 +346,6 @@ BOOST_PYTHON_MODULE(timblapi) .def("expName", &TimblApiWrapper::ExpName, EXPNAME_DOC) .def("versionInfo", &TimblApiWrapper::VersionInfo, VERSIONINFO_DOC) .staticmethod("versionInfo") - .def("startServer", &TimblApiWrapper::StartServer, STARTSERVER_DOC) .def("currentWeighting", &TimblApiWrapper::CurrentWeighting, CURRENTWEIGHTING_DOC) .def("valid", &TimblApiWrapper::Valid) @@ -382,7 +381,7 @@ BOOST_PYTHON_MODULE(timblapi) .value("X2", Timbl::X2) .value("SV", Timbl::SV) ; - + //def("to_string", to_string); } diff --git a/src/timblapi.h b/src/timblapi.h index 546877c..6390a9b 100644 --- a/src/timblapi.h +++ b/src/timblapi.h @@ -2,27 +2,27 @@ * Copyright (C) 2006 Sander Canisius * * This file is part of python-timbl. - * + * * python-timbl is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. - * + * * python-timbl is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with python-timbl; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA - * + * * Linking python-timbl statically or dynamically with other modules * is making a combined work based on python-timbl. Thus, the terms * and conditions of the GNU General Public License cover the whole * combination. - * + * * In addition, as a special exception, the copyright holder of * python-timbl gives you permission to combine python-timbl with free * software programs or libraries that are released under the GNU LGPL @@ -33,7 +33,7 @@ * code concerned, provided that you include the source code of that * other code when and as the GNU GPL requires distribution of source * code. - * + * * Note that people who make modified versions of python-timbl are not * obligated to grant this special exception for their modified * versions; it is their choice whether to do so. The GNU General @@ -69,23 +69,23 @@ class TimblApiWrapper : public Timbl::TimblAPI { bool debug; int runningthreads; public: - TimblApiWrapper(const std::string& args, const std::string& name="") : Timbl::TimblAPI(args, name) { + TimblApiWrapper(const std::string& args, const std::string& name="") : Timbl::TimblAPI(args, name) { detachedexp = NULL; debug = false; runningthreads = 0; pthread_mutex_init(&lock, NULL); } - ~TimblApiWrapper() { + ~TimblApiWrapper() { if (debug) std::cerr << "TimblApiWrapper Destructor" << std::endl; if (runningthreads == 0) { - if (detachedexp != NULL) delete detachedexp; + if (detachedexp != NULL) delete detachedexp; for (std::vector >::iterator iter = experimentpool.begin(); iter != experimentpool.end(); iter++) { delete iter->second; } } } - + void initthreading(); void enableDebug() { debug = true; }; From 30da0ab097662e70849ebf39aff999c1b7fdc2c4 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 4 Apr 2017 15:32:46 +0200 Subject: [PATCH 06/81] update before 2017.04.04 release --- setup2.py | 2 +- setup3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup2.py b/setup2.py index 8c01dd7..9b1ffe3 100755 --- a/setup2.py +++ b/setup2.py @@ -126,7 +126,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2016.06.02", + version="2017.04.04", description="Python language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 7603b00..7f1d51d 100755 --- a/setup3.py +++ b/setup3.py @@ -124,7 +124,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2016.06.02", + version="2017.04.04", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 6423d7469f2137cb70aeaa12aefbeb08f9269ec6 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Thu, 9 Nov 2017 22:19:26 +0100 Subject: [PATCH 07/81] Added -std=c++11 , needed for latst timbl --- setup2.py | 4 +++- setup3.py | 3 ++- src/timblapi.cc | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/setup2.py b/setup2.py index 9b1ffe3..cec6b34 100755 --- a/setup2.py +++ b/setup2.py @@ -109,6 +109,8 @@ def build_extensions(self): "-Wl,-Bstatic -l" + boostlib + " -Wl,-Bdynamic".split()) else: ext.libraries.append(boostlib) + + ext.extra_compile_args.extend("-std=c++11") if isinstance(self.compiler, UnixCCompiler) and \ self.static_boost_python: ext.extra_link_args.extend( @@ -126,7 +128,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2017.04.04", + version="2017.11.09", description="Python language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 7f1d51d..9aaea54 100755 --- a/setup3.py +++ b/setup3.py @@ -108,6 +108,7 @@ def build_extensions(self): print("Unable to find boost library",file=sys.stderr) sys.exit(65) + ext.extra_compile_args.extend(["-std=c++11"]) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( "-Wl,-Bstatic -l" + boostlib + " -Wl,-Bdynamic".split()) @@ -124,7 +125,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2017.04.04", + version="2017.11.09", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/src/timblapi.cc b/src/timblapi.cc index 1925db7..65fb509 100755 --- a/src/timblapi.cc +++ b/src/timblapi.cc @@ -54,6 +54,7 @@ #include #include #include +#include #ifndef __clang__ #include From 3fe2d528642e474c7bc69fa5ee03d8b8ce976435 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 26 Feb 2018 21:41:46 +0100 Subject: [PATCH 08/81] Attempting to make installer more robust for finding boost, libxml2 and timbl dependencies, even on MacOS X + homebrew (migrating some of the burden away from LaMachine) --- setup2.py | 141 +++++++++++++++++++++++++++++++++++++++--------------- setup3.py | 128 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 197 insertions(+), 72 deletions(-) diff --git a/setup2.py b/setup2.py index 9b1ffe3..d85ee33 100755 --- a/setup2.py +++ b/setup2.py @@ -1,6 +1,10 @@ -#!/usr/bin/python +#!/usr/bin/python3 + +import sys import os import shutil +import platform +import glob if os.path.exists('setup2.py'): shutil.copyfile("setup2.py","setup.py") @@ -39,30 +43,106 @@ class BuildExt(build_ext): ("timbl-library-dir=", None, "directory for TiMBL library files"), ("libxml2-include-dir=", None, "directory for LibXML2 files"), ("libxml2-library-dir=", None, "directory for LibXML2 library files"), - ("static-boost-python", "s", "statically link boost-python")] + ("static-boost-python3", "s", "statically link boost-python")] boolean_options = build_ext.boolean_options + [ - "static-boost-python"] + "static-boost-python3"] + def initialize_options(self): build_ext.initialize_options(self) - self.boost_include_dir = "/usr/include" - self.boost_library_dir = "/usr/lib" - self.libxml2_include_dir = "/usr/include/libxml2" - self.libxml2_library_dir = "/usr/lib" - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include/timbl'): - self.timbl_include_dir = os.environ['VIRTUAL_ENV'] + '/include' - self.timbl_library_dir = os.environ['VIRTUAL_ENV'] + '/lib' - elif os.path.exists("/usr/include/timbl"): - self.timbl_include_dir = "/usr/include" - self.timbl_library_dir = "/usr/lib" - elif os.path.exists("/usr/local/include/timbl"): - self.timbl_include_dir = "/usr/local/include" - self.timbl_library_dir = "/usr/local/lib" - else: + pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 + libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system.lower() + '-gnu', '/usr/local/lib'] + includesearch = ['/usr/include', '/usr/local/include'] + if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): + libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') + if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include'): + includesearch.insert(0, os.environ['VIRTUAL_ENV'] + '/include') + + #Find boost + self.findboost(libsearch, includesearch, pyversion) + + #Find libxml2 + if os.path.exists('/usr/local/Cellar/libxml2'): + #Mac OS X with homebrew + versiondirs = [] + for d in glob.glob('/usr/local/Cellar/libxml2/*'): + if os.path.isdir(d) and d[0] != '.': + versiondirs.append(os.path.basename(d)) + if versiondirs: + versiondirs.sort() + version = versiondirs[0] + libsearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/lib') + includesearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/include') + + for d in includesearch: + if os.path.exists(d + '/libxml2'): + self.libxml2_include_dir = d + '/libxml2' + self.libxml2_library_dir = d.replace('include','lib') + break + + #Find timbl + self.timbl_library_dir = None + for d in includesearch: + if os.path.exists(d + '/timbl'): + self.timbl_include_dir = d + '/timbl' + self.timbl_library_dir = d.replace('include','lib') + break + + if self.timbl_library_dir is None: raise Exception("Timbl not found, make sure to install Timbl and set --timbl-include-dir and --timbl-library-dir appropriately...") + self.static_boost_python = False + def findboost(self, libsearch, includesearch, pyversion): + self.boost_library_dir = None + self.boost_include_dir = None + self.boostlib = "boost_python" + if os.path.exists('/usr/local/Cellar/boost-python'): + #Mac OS X with homebrew + versiondirs = [] + for d in glob.glob('/usr/local/Cellar/boost-python/*'): + if os.path.isdir(d) and d[0] != '.': + versiondirs.append(os.path.basename(d)) + if versiondirs: + versiondirs.sort() + version = versiondirs[0] + libsearch.append('/usr/local/Cellar/boost-python/' + version + '/lib') + includesearch.append('/usr/local/Cellar/boost/' + version + '/include') + + for d in libsearch: + if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): + self.boost_library_dir = d + self.boostlib = "boost_python-py" + pyversion + elif os.path.exists(d + "/libboost_python2.so"): + self.boost_library_dir = d + self.boost_library_dir = "boost_python2" + elif os.path.exists(d + "/libboost_python.so"): + #probably goes wrong if this is for python 3! + self.boost_library_dir = d + self.boostlib = "boost_python" + elif os.path.exists(d + "/libboost_python3.dylib"): #Mac OS X + self.boost_library_dir = d + self.boostlib = "boost_python3" + elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X + self.boost_library_dir = d + #probably goes wrong if this is for python 2! + self.boostlib = "boost_python" + for d in includesearch: + if os.path.exists(d + "/boost"): + self.boost_include_path = d + "/boost" + + if self.boost_library_dir is not None: + print >>sys.stderr, "Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")" + else: + print >>sys.stderr, "Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?" + self.boost_library_dir = libsearch[0] + if self.boost_include_dir: + print >>sys.stderr, "Detected boost headers in " + self.boost_include_dir + " (" + self.boostlib +")" + else: + print >>sys.stderr, "Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-incldue-dir" + self.boost_include_dir = includesearch[0] + def finalize_options(self): build_ext.finalize_options(self) self.ensure_file_exists("boost_include_dir", "boost/python.hpp") @@ -92,29 +172,12 @@ def build_extensions(self): ext.library_dirs.append(self.boost_library_dir) ext.library_dirs.append(self.libxml2_library_dir) - pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 27 - if os.path.exists(self.boost_library_dir + "/libboost_python-py"+pyversion+".so"): - boostlib = "boost_python-py" + pyversion - elif os.path.exists(self.boost_library_dir + "/libboost_python2.so"): - boostlib = "boost_python2" - elif os.path.exists(self.boost_library_dir + "/libboost_python.so"): - #probably goes wrong if this is for python 3! - boostlib = "boost_python" - else: - print >>sys.stderr, "Unable to find boost library" - sys.exit(65) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( - "-Wl,-Bstatic -l" + boostlib + " -Wl,-Bdynamic".split()) - else: - ext.libraries.append(boostlib) - if isinstance(self.compiler, UnixCCompiler) and \ - self.static_boost_python: - ext.extra_link_args.extend( - "-Wl,-Bstatic -lboost_python -Wl,-Bdynamic".split()) + "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) else: - ext.libraries.append("boost_python") + ext.libraries.append(self.boostlib) build_ext.build_extensions(self) @@ -126,13 +189,13 @@ def build_extensions(self): setup( name="python-timbl", - version="2017.04.04", - description="Python language binding for the Tilburg Memory-Based Learner", + version="2018.02.26", + description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", url="http://github.com/proycon/python-timbl", + classifiers=["Development Status :: 4 - Beta","Topic :: Text Processing :: Linguistic","Topic :: Scientific/Engineering","Programming Language :: Python :: 3","Operating System :: POSIX","Intended Audience :: Developers","Intended Audience :: Science/Research","License :: OSI Approved :: GNU General Public License v3 (GPLv3)"], license="GPL", - classifiers=["Development Status :: 4 - Beta","Topic :: Text Processing :: Linguistic","Topic :: Scientific/Engineering","Programming Language :: Python :: 2.6","Programming Language :: Python :: 2.7","Operating System :: POSIX","Intended Audience :: Developers","Intended Audience :: Science/Research","License :: OSI Approved :: GNU General Public License v3 (GPLv3)"], py_modules=['timbl'], ext_modules=[timblModule], cmdclass={"build_ext": BuildExt}) diff --git a/setup3.py b/setup3.py index 7f1d51d..baec548 100755 --- a/setup3.py +++ b/setup3.py @@ -3,6 +3,8 @@ import sys import os import shutil +import platform +import glob if os.path.exists('setup3.py'): shutil.copyfile("setup3.py","setup.py") @@ -43,25 +45,101 @@ class BuildExt(build_ext): boolean_options = build_ext.boolean_options + [ "static-boost-python3"] + def initialize_options(self): build_ext.initialize_options(self) - self.boost_include_dir = "/usr/include" - self.boost_library_dir = "/usr/lib" - self.libxml2_include_dir = "/usr/include/libxml2" - self.libxml2_library_dir = "/usr/lib" - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include/timbl'): - self.timbl_include_dir = os.environ['VIRTUAL_ENV'] + '/include' - self.timbl_library_dir = os.environ['VIRTUAL_ENV'] + '/lib' - elif os.path.exists("/usr/include/timbl"): - self.timbl_include_dir = "/usr/include" - self.timbl_library_dir = "/usr/lib" - elif os.path.exists("/usr/local/include/timbl"): - self.timbl_include_dir = "/usr/local/include" - self.timbl_library_dir = "/usr/local/lib" - else: + pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 + libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system.lower() + '-gnu', '/usr/local/lib'] + includesearch = ['/usr/include', '/usr/local/include'] + if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): + libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') + if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include'): + includesearch.insert(0, os.environ['VIRTUAL_ENV'] + '/include') + + #Find boost + self.findboost(libsearch, includesearch, pyversion) + + #Find libxml2 + if os.path.exists('/usr/local/Cellar/libxml2'): + #Mac OS X with homebrew + versiondirs = [] + for d in glob.glob('/usr/local/Cellar/libxml2/*'): + if os.path.isdir(d) and d[0] != '.': + versiondirs.append(os.path.basename(d)) + if versiondirs: + versiondirs.sort() + version = versiondirs[0] + libsearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/lib') + includesearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/include') + + for d in includesearch: + if os.path.exists(d + '/libxml2'): + self.libxml2_include_dir = d + '/libxml2' + self.libxml2_library_dir = d.replace('include','lib') + break + + #Find timbl + self.timbl_library_dir = None + for d in includesearch: + if os.path.exists(d + '/timbl'): + self.timbl_include_dir = d + '/timbl' + self.timbl_library_dir = d.replace('include','lib') + break + + if self.timbl_library_dir is None: raise Exception("Timbl not found, make sure to install Timbl and set --timbl-include-dir and --timbl-library-dir appropriately...") + self.static_boost_python = False + def findboost(self, libsearch, includesearch, pyversion): + self.boost_library_dir = None + self.boost_include_dir = None + self.boostlib = "boost_python" + if os.path.exists('/usr/local/Cellar/boost-python'): + #Mac OS X with homebrew + versiondirs = [] + for d in glob.glob('/usr/local/Cellar/boost-python/*'): + if os.path.isdir(d) and d[0] != '.': + versiondirs.append(os.path.basename(d)) + if versiondirs: + versiondirs.sort() + version = versiondirs[0] + libsearch.append('/usr/local/Cellar/boost-python/' + version + '/lib') + includesearch.append('/usr/local/Cellar/boost/' + version + '/include') + + for d in libsearch: + if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): + self.boost_library_dir = d + self.boostlib = "boost_python-py" + pyversion + elif os.path.exists(d + "/libboost_python3.so"): + self.boost_library_dir = d + self.boost_library_dir = "boost_python3" + elif os.path.exists(d + "/libboost_python.so"): + #probably goes wrong if this is for python 2! + self.boost_library_dir = d + self.boostlib = "boost_python" + elif os.path.exists(d + "/libboost_python3.dylib"): #Mac OS X + self.boost_library_dir = d + self.boostlib = "boost_python3" + elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X + self.boost_library_dir = d + #probably goes wrong if this is for python 2! + self.boostlib = "boost_python" + for d in includesearch: + if os.path.exists(d + "/boost"): + self.boost_include_path = d + "/boost" + + if self.boost_library_dir is not None: + print("Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")",file=sys.stderr) + else: + print("Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?",file=sys.stderr) + self.boost_library_dir = libsearch[0] + if self.boost_include_dir: + print("Detected boost headers in " + self.boost_include_dir + " (" + self.boostlib +")",file=sys.stderr) + else: + print("Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-incldue-dir",file=sys.stderr) + self.boost_include_dir = includesearch[0] + def finalize_options(self): build_ext.finalize_options(self) self.ensure_file_exists("boost_include_dir", "boost/python.hpp") @@ -91,28 +169,12 @@ def build_extensions(self): ext.library_dirs.append(self.boost_library_dir) ext.library_dirs.append(self.libxml2_library_dir) - pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - if os.path.exists(self.boost_library_dir + "/libboost_python-py"+pyversion+".so"): - boostlib = "boost_python-py" + pyversion - elif os.path.exists(self.boost_library_dir + "/libboost_python3.so"): - boostlib = "boost_python3" - elif os.path.exists(self.boost_library_dir + "/libboost_python.so"): - #probably goes wrong if this is for python 2! - boostlib = "boost_python" - elif os.path.exists(self.boost_library_dir + "/libboost_python3.dylib"): #Mac OS X - boostlib = "boost_python3" - elif os.path.exists(self.boost_library_dir + "/libboost_python.dylib"): #Mac OS X - #probably goes wrong if this is for python 2! - boostlib = "boost_python" - else: - print("Unable to find boost library",file=sys.stderr) - sys.exit(65) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( - "-Wl,-Bstatic -l" + boostlib + " -Wl,-Bdynamic".split()) + "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) else: - ext.libraries.append(boostlib) + ext.libraries.append(self.boostlib) build_ext.build_extensions(self) @@ -124,7 +186,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2017.04.04", + version="2018.02.26", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From cd0689a099dffe7794f6a57c827e3112ca36de52 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 27 Feb 2018 11:54:04 +0100 Subject: [PATCH 09/81] fixes --- setup2.py | 14 +++++++------- setup3.py | 15 +++++++-------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/setup2.py b/setup2.py index 36e8a62..96b7690 100755 --- a/setup2.py +++ b/setup2.py @@ -52,7 +52,7 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system.lower() + '-gnu', '/usr/local/lib'] + libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') @@ -85,7 +85,7 @@ def initialize_options(self): self.timbl_library_dir = None for d in includesearch: if os.path.exists(d + '/timbl'): - self.timbl_include_dir = d + '/timbl' + self.timbl_include_dir = d self.timbl_library_dir = d.replace('include','lib') break @@ -116,7 +116,7 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python-py" + pyversion elif os.path.exists(d + "/libboost_python2.so"): self.boost_library_dir = d - self.boost_library_dir = "boost_python2" + self.boostlib = "boost_python2" elif os.path.exists(d + "/libboost_python.so"): #probably goes wrong if this is for python 3! self.boost_library_dir = d @@ -130,7 +130,7 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python" for d in includesearch: if os.path.exists(d + "/boost"): - self.boost_include_path = d + "/boost" + self.boost_include_dir = d if self.boost_library_dir is not None: print >>sys.stderr, "Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")" @@ -138,9 +138,9 @@ def findboost(self, libsearch, includesearch, pyversion): print >>sys.stderr, "Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?" self.boost_library_dir = libsearch[0] if self.boost_include_dir: - print >>sys.stderr, "Detected boost headers in " + self.boost_include_dir + " (" + self.boostlib +")" + print >>sys.stderr, "Detected boost headers in " + self.boost_include_dir else: - print >>sys.stderr, "Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-incldue-dir" + print >>sys.stderr, "Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-include-dir" self.boost_include_dir = includesearch[0] def finalize_options(self): @@ -173,13 +173,13 @@ def build_extensions(self): ext.library_dirs.append(self.libxml2_library_dir) + ext.extra_compile_args.extend(["-std=c++11"]) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) else: ext.libraries.append(self.boostlib) - ext.extra_compile_args.extend("-std=c++11") build_ext.build_extensions(self) diff --git a/setup3.py b/setup3.py index 87f85b0..35861b2 100755 --- a/setup3.py +++ b/setup3.py @@ -49,7 +49,7 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system.lower() + '-gnu', '/usr/local/lib'] + libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') @@ -82,7 +82,7 @@ def initialize_options(self): self.timbl_library_dir = None for d in includesearch: if os.path.exists(d + '/timbl'): - self.timbl_include_dir = d + '/timbl' + self.timbl_include_dir = d self.timbl_library_dir = d.replace('include','lib') break @@ -113,7 +113,7 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python-py" + pyversion elif os.path.exists(d + "/libboost_python3.so"): self.boost_library_dir = d - self.boost_library_dir = "boost_python3" + self.boostlib = "boost_python3" elif os.path.exists(d + "/libboost_python.so"): #probably goes wrong if this is for python 2! self.boost_library_dir = d @@ -127,17 +127,17 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python" for d in includesearch: if os.path.exists(d + "/boost"): - self.boost_include_path = d + "/boost" + self.boost_include_dir = d if self.boost_library_dir is not None: print("Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")",file=sys.stderr) else: print("Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?",file=sys.stderr) self.boost_library_dir = libsearch[0] - if self.boost_include_dir: - print("Detected boost headers in " + self.boost_include_dir + " (" + self.boostlib +")",file=sys.stderr) + if self.boost_include_dir is not None: + print("Detected boost headers in " + self.boost_include_dir ,file=sys.stderr) else: - print("Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-incldue-dir",file=sys.stderr) + print("Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-include-dir",file=sys.stderr) self.boost_include_dir = includesearch[0] def finalize_options(self): @@ -177,7 +177,6 @@ def build_extensions(self): else: ext.libraries.append(self.boostlib) - ext.extra_compile_args.extend("-std=c++11") build_ext.build_extensions(self) From 776abdf6de245bd4c9ae79004b2f302fd349b409 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 27 Feb 2018 11:54:34 +0100 Subject: [PATCH 10/81] version bump --- setup2.py | 2 +- setup3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup2.py b/setup2.py index 96b7690..2204fd3 100755 --- a/setup2.py +++ b/setup2.py @@ -191,7 +191,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2018.02.26", + version="2018.02.27", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 35861b2..56dacdb 100755 --- a/setup3.py +++ b/setup3.py @@ -188,7 +188,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2018.02.26", + version="2018.02.27", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From cda8bd6685e3300f7e403b613d5a53497303587c Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Sat, 3 Mar 2018 12:51:45 +0100 Subject: [PATCH 11/81] Explicitly use libc++ on mac --- setup2.py | 7 +++++-- setup3.py | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/setup2.py b/setup2.py index 2204fd3..3f6d7bd 100755 --- a/setup2.py +++ b/setup2.py @@ -173,7 +173,10 @@ def build_extensions(self): ext.library_dirs.append(self.libxml2_library_dir) - ext.extra_compile_args.extend(["-std=c++11"]) + compile_args = ["-std=c++11"] + if platform.system() == "Darwin": + compile_args.append("-stdlib=libc++") + ext.extra_compile_args.extend(compile_args) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) @@ -191,7 +194,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2018.02.27", + version="2018.03.03", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 56dacdb..fdb80c0 100755 --- a/setup3.py +++ b/setup3.py @@ -169,8 +169,10 @@ def build_extensions(self): ext.library_dirs.append(self.boost_library_dir) ext.library_dirs.append(self.libxml2_library_dir) - - ext.extra_compile_args.extend(["-std=c++11"]) + compile_args = ["-std=c++11"] + if platform.system() == "Darwin": + compile_args.append("-stdlib=libc++") + ext.extra_compile_args.extend(compile_args) if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: ext.extra_link_args.extend( "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) @@ -188,7 +190,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2018.02.27", + version="2018.03.03", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From d438e4d65cbff77143866f94c841688f22f6377a Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 7 Mar 2018 11:59:36 +0100 Subject: [PATCH 12/81] fixing boost detection (for Mac OS X again...) --- setup2.py | 23 +++++++++-------------- setup3.py | 23 ++++++++++++----------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/setup2.py b/setup2.py index 3f6d7bd..035563f 100755 --- a/setup2.py +++ b/setup2.py @@ -98,36 +98,31 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None self.boost_include_dir = None self.boostlib = "boost_python" - if os.path.exists('/usr/local/Cellar/boost-python'): + if os.path.exists('/usr/local/opt/boost-python'): #Mac OS X with homebrew - versiondirs = [] - for d in glob.glob('/usr/local/Cellar/boost-python/*'): - if os.path.isdir(d) and d[0] != '.': - versiondirs.append(os.path.basename(d)) - if versiondirs: - versiondirs.sort() - version = versiondirs[0] - libsearch.append('/usr/local/Cellar/boost-python/' + version + '/lib') - includesearch.append('/usr/local/Cellar/boost/' + version + '/include') + libsearch.insert(0,'/usr/local/opt/boost-python/lib') + libsearch.insert(0,'/usr/local/opt/boost/lib') + includesearch.insert(0,'/usr/local/opt/boost/include') for d in libsearch: if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): self.boost_library_dir = d self.boostlib = "boost_python-py" + pyversion + break elif os.path.exists(d + "/libboost_python2.so"): self.boost_library_dir = d self.boostlib = "boost_python2" + break elif os.path.exists(d + "/libboost_python.so"): #probably goes wrong if this is for python 3! self.boost_library_dir = d self.boostlib = "boost_python" - elif os.path.exists(d + "/libboost_python3.dylib"): #Mac OS X - self.boost_library_dir = d - self.boostlib = "boost_python3" + break elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X self.boost_library_dir = d #probably goes wrong if this is for python 2! self.boostlib = "boost_python" + break for d in includesearch: if os.path.exists(d + "/boost"): self.boost_include_dir = d @@ -194,7 +189,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2018.03.03", + version="2018.03.07", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index fdb80c0..eee3455 100755 --- a/setup3.py +++ b/setup3.py @@ -95,39 +95,40 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None self.boost_include_dir = None self.boostlib = "boost_python" - if os.path.exists('/usr/local/Cellar/boost-python'): + if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew - versiondirs = [] - for d in glob.glob('/usr/local/Cellar/boost-python/*'): - if os.path.isdir(d) and d[0] != '.': - versiondirs.append(os.path.basename(d)) - if versiondirs: - versiondirs.sort() - version = versiondirs[0] - libsearch.append('/usr/local/Cellar/boost-python/' + version + '/lib') - includesearch.append('/usr/local/Cellar/boost/' + version + '/include') + self.boostlib = "boost_python3" + libsearch.insert(0,'/usr/local/opt/boost-python3/lib') + libsearch.insert(0,'/usr/local/opt/boost/lib') + includesearch.insert(0,'/usr/local/opt/boost/include') for d in libsearch: if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): self.boost_library_dir = d self.boostlib = "boost_python-py" + pyversion + break elif os.path.exists(d + "/libboost_python3.so"): self.boost_library_dir = d self.boostlib = "boost_python3" + break elif os.path.exists(d + "/libboost_python.so"): #probably goes wrong if this is for python 2! self.boost_library_dir = d self.boostlib = "boost_python" + break elif os.path.exists(d + "/libboost_python3.dylib"): #Mac OS X self.boost_library_dir = d self.boostlib = "boost_python3" + break elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X self.boost_library_dir = d #probably goes wrong if this is for python 2! self.boostlib = "boost_python" + break for d in includesearch: if os.path.exists(d + "/boost"): self.boost_include_dir = d + break if self.boost_library_dir is not None: print("Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")",file=sys.stderr) @@ -190,7 +191,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2018.03.03", + version="2018.03.07", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From e65504914c559175042f7ef74557cb0cba57ebf3 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 7 Mar 2018 12:01:52 +0100 Subject: [PATCH 13/81] force recompilation --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..b24295f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[build_ext] +force=1 From b7d5df7cba9992cbd11d8d559881488ca0019a69 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 23 Apr 2018 12:38:08 +0200 Subject: [PATCH 14/81] yet another round of fixes for Mac OS X compilation --- setup2.py | 6 +++++- setup3.py | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/setup2.py b/setup2.py index 035563f..4e4e8d1 100755 --- a/setup2.py +++ b/setup2.py @@ -118,6 +118,10 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = d self.boostlib = "boost_python" break + elif os.path.exists(d + "/libboost_python" + pyversion + ".dylib"): #Mac OS X + self.boost_library_dir = d + self.boostlib = "boost_python" + pyversion + break elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X self.boost_library_dir = d #probably goes wrong if this is for python 2! @@ -189,7 +193,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2018.03.07", + version="2018.04.23", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index eee3455..a38ab89 100755 --- a/setup3.py +++ b/setup3.py @@ -97,7 +97,6 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python" if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew - self.boostlib = "boost_python3" libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') @@ -116,6 +115,10 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = d self.boostlib = "boost_python" break + elif os.path.exists(d + "/libboost_python" + pyversion + ".dylib"): #Mac OS X + self.boost_library_dir = d + self.boostlib = "boost_python" + pyversion + break elif os.path.exists(d + "/libboost_python3.dylib"): #Mac OS X self.boost_library_dir = d self.boostlib = "boost_python3" @@ -191,7 +194,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2018.03.07", + version="2018.04.23", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 550d6a6ee1402d6e6f83dfe3ffbd8c98b1c6e401 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 08:28:05 +0200 Subject: [PATCH 15/81] added sk_timbl adjustments --- timbl.py | 55 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/timbl.py b/timbl.py index fea88ae..01a4d5a 100644 --- a/timbl.py +++ b/timbl.py @@ -21,6 +21,7 @@ stderr = sys.stderr stdout = sys.stdout +from tempfile import mktemp import timblapi import io import os @@ -59,15 +60,19 @@ def u(s, encoding = 'utf-8', errors='strict'): class TimblClassifier(object): - def __init__(self, fileprefix, timbloptions, format = "Tabbed", dist=True, encoding = 'utf-8', overwrite = True, flushthreshold=10000, threading=False, normalize=True, debug=False): + def __init__(self, fileprefix, timbloptions, format = "Tabbed", dist=True, encoding = 'utf-8', overwrite = True, flushthreshold=10000, threading=False, normalize=True, debug=False, sklearn=False, flushdir=None): if format.lower() == "tabbed": self.format = "Tabbed" self.delimiter = "\t" elif format.lower() == "columns": self.format = "Columns" self.delimiter = " " + elif format.lower() == 'sparse': # for sparse arrays, e.g. scipy.sparse.csr + self.format = "Sparse" + self.delimiter = "" else: - raise ValueError("Only Tabbed and Columns are supported input format for the python wrapper, not " + format) + raise ValueError("Only Tabbed, Columns, and Sparse are supported input format for the python wrapper, not " + format) + self.timbloptions = timbloptions self.fileprefix = fileprefix @@ -81,10 +86,14 @@ def __init__(self, fileprefix, timbloptions, format = "Tabbed", dist=True, encod self.api = None self.debug = debug - if os.path.exists(self.fileprefix + ".train") and overwrite: - self.flushed = 0 + if sklearn: + import scipy as sp + self.flushfile = mktemp(prefix=self.fileprefix, dir=flushdir) else: - self.flushed = 1 + if os.path.exists(self.fileprefix + ".train") and overwrite: + self.flushed = 0 + else: + self.flushed = 1 self.threading = threading @@ -94,8 +103,10 @@ def validatefeatures(self,features): for feature in features: if isinstance(feature, int) or isinstance(feature, float): validatedfeatures.append( str(feature) ) - elif self.delimiter in feature: + elif self.delimiter in feature and not sklearn: raise ValueError("Feature contains delimiter: " + feature) + elif sklearn and isinstance(feature, str): #then is sparse added together + validatedfeatures.append(feature) else: validatedfeatures.append(feature) return validatedfeatures @@ -106,10 +117,10 @@ def append(self, features, classlabel): features = self.validatefeatures(features) - if self.delimiter in classlabel: + if self.delimiter in classlabel and self.delimiter != '': raise ValueError("Class label contains delimiter: " + self.delimiter) - self.instances.append(self.delimiter.join(features) + self.delimiter + classlabel) + self.instances.append(self.delimiter.join(features) + (self.delimiter if not self.delimiter == '' else ' ') + classlabel) if len(self.instances) >= self.flushthreshold: self.flush() @@ -117,10 +128,13 @@ def flush(self): if self.debug: print("Flushing...",file=sys.stderr) if len(self.instances) == 0: return False - if self.flushed: - f = io.open(self.fileprefix + ".train",'a', encoding=self.encoding) + if hasattr(self, 'flushfile'): + f = io.open(self.flushfile,'w', encoding=self.encoding) else: - f = io.open(self.fileprefix + ".train",'w', encoding=self.encoding) + if self.flushed: + f = io.open(self.fileprefix + ".train",'a', encoding=self.encoding) + else: + f = io.open(self.fileprefix + ".train",'w', encoding=self.encoding) for instance in self.instances: f.write(instance + "\n") @@ -135,8 +149,18 @@ def __delete__(self): def train(self, save=False): self.flush() - if not os.path.exists(self.fileprefix + ".train"): - raise LoadException("Training file '"+self.fileprefix+".train' not found. Did you forget to add instances with append()?") + + if hasattr(self, 'flushfile'): + if not os.path.exists(self.flushfile): + raise LoadException("Training file '"+self.flushfile+"' not found. Did you forget to add instances with append()?") + else: + filepath = self.flushfile + else: + if not os.path.exists(self.fileprefix + ".train"): + raise LoadException("Training file '"+self.fileprefix+".train' not found. Did you forget to add instances with append()?") + else: + filepath = self.fileprefix + '.train' + options = "-F " + self.format + " " + self.timbloptions if self.dist: options += " +v+db +v+di" @@ -149,7 +173,7 @@ def train(self, save=False): print("Enabling debug for timblapi",file=stderr) self.api.enableDebug() - trainfile = self.fileprefix + ".train" + trainfile = filepath self.api.learn(b(trainfile)) if save: self.save() @@ -168,7 +192,8 @@ def classify(self, features, allowtopdistribution=True): if not self.api: self.load() - testinstance = self.delimiter.join(features) + self.delimiter + "?" + + testinstance = self.delimiter.join(features) + (self.delimiter if not self.delimiter == '' else ' ') + "?" if self.dist: if self.threading: result, cls, distribution, distance = self.api.classify3safe(b(testinstance), self.normalize, int(not allowtopdistribution)) From fb9316e7f86ecb1ac9d82cc04dcc5f6569924830 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 08:28:58 +0200 Subject: [PATCH 16/81] added sk_timbl.py --- sk_timbl.py | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 sk_timbl.py diff --git a/sk_timbl.py b/sk_timbl.py new file mode 100644 index 0000000..544d9e3 --- /dev/null +++ b/sk_timbl.py @@ -0,0 +1,89 @@ +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils import check_X_y, check_array +import scipy as sp +import numpy as np + +class skTiMBL(BaseEstimator, ClassifierMixin): + def __init__(self, prefix='timbl', algorithm=0, dist_metric=None, + k=1, normalize=False, debug=0): + self.prefix = prefix + self.algorithm = algorithm + self.dist_metric = dist_metric + self.k = k + self.normalize = normalize + self.debug = debug + + self.classifier = TimblCl(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), + debug=True, flushthreshold=20000) + + def fit(self, X, y=None): + X, y = check_X_y(X, y, dtype=np.int64, accept_sparse='csr') + + n_rows = X.shape[0] + if sp.sparse.issparse(X): + if self.debug: print('Features are sparse, choosing faster learning') + + self.classifier = TimblCl(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), + format='Sparse', debug=True, flushthreshold=20000) + + for i in range(n_rows): + sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] + self.classifier.append(sparse,str(y[i])) + + else: + if y.dtype != 'O': + y = y.astype(str) + + for i in range(n_rows): + self.classifier.append(list(X[i].toarray()[0]), y[i]) + + self.classifier.train() + return self + + + def predict(self, X, y=None): + X = check_array(X, dtype=np.int64, accept_sparse='csr') + + n_samples = X.shape[0] + pred = [] + + if sp.sparse.issparse(X): + if self.debug: print('Features are sparse, choosing faster predictions') + + for i in range(n_samples): + sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] + y_pred,_, distance = self.classifier.classify(sparse) + pred.append(np.int64(y_pred)) + + else: + for i in range(n_samples): + y_pred,_, distance = self.classifier.classify(list(X[i].toarray()[0])) + pred.append(np.int64(y_pred)) + + return pred + + + def predict_proba(self, X, y=None): + X = check_array(X, dtype=np.float64, accept_sparse='csr') + + n_samples = X.shape[0] + + pred = [] + + if sp.sparse.issparse(X): + print('Features are sparse, choosing faster predictions') + + for i in range(n_samples): + sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] + _,dist, distance = self.classifier.classify(sparse) + pred.append(np.int64(dist)) + + else: + for i in range(n_samples): + _,proba, distance = self.classifier.classify(list(X[i].toarray()[0])) + pred.append(np.float(proba)) + + return pred + + def remove_flushfile(self): + self.classifier.remove_flush() From 66b710a87954b0a1d5f0496d98c19ec11d38e07b Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 08:35:19 +0200 Subject: [PATCH 17/81] removed removal of flushfile from /tmp --- sk_timbl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sk_timbl.py b/sk_timbl.py index 544d9e3..fac12d8 100644 --- a/sk_timbl.py +++ b/sk_timbl.py @@ -85,5 +85,3 @@ def predict_proba(self, X, y=None): return pred - def remove_flushfile(self): - self.classifier.remove_flush() From 7143dd49ac80311c70b3585309d4e16ad68322c9 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 09:35:02 +0200 Subject: [PATCH 18/81] fixed classifier name --- sk_timbl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sk_timbl.py b/sk_timbl.py index fac12d8..ab9a1fd 100644 --- a/sk_timbl.py +++ b/sk_timbl.py @@ -1,5 +1,6 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils import check_X_y, check_array +from timbl import TimblClassifier import scipy as sp import numpy as np @@ -13,7 +14,7 @@ def __init__(self, prefix='timbl', algorithm=0, dist_metric=None, self.normalize = normalize self.debug = debug - self.classifier = TimblCl(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), + self.classifier = TimblClassifier(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), debug=True, flushthreshold=20000) def fit(self, X, y=None): @@ -23,7 +24,7 @@ def fit(self, X, y=None): if sp.sparse.issparse(X): if self.debug: print('Features are sparse, choosing faster learning') - self.classifier = TimblCl(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), + self.classifier = TimblClassifier(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), format='Sparse', debug=True, flushthreshold=20000) for i in range(n_rows): From 88111335ede65c40ae4b402d622f0ae9754d8755 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 09:54:59 +0200 Subject: [PATCH 19/81] fixes --- sk_timbl.py | 4 ++-- timbl.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sk_timbl.py b/sk_timbl.py index ab9a1fd..ad821bb 100644 --- a/sk_timbl.py +++ b/sk_timbl.py @@ -15,7 +15,7 @@ def __init__(self, prefix='timbl', algorithm=0, dist_metric=None, self.debug = debug self.classifier = TimblClassifier(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), - debug=True, flushthreshold=20000) + debug=True, sklearn=True, flushthreshold=20000) def fit(self, X, y=None): X, y = check_X_y(X, y, dtype=np.int64, accept_sparse='csr') @@ -25,7 +25,7 @@ def fit(self, X, y=None): if self.debug: print('Features are sparse, choosing faster learning') self.classifier = TimblClassifier(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), - format='Sparse', debug=True, flushthreshold=20000) + format='Sparse', debug=True, sklearn=True, flushthreshold=20000) for i in range(n_rows): sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] diff --git a/timbl.py b/timbl.py index 01a4d5a..99facc3 100644 --- a/timbl.py +++ b/timbl.py @@ -85,10 +85,12 @@ def __init__(self, fileprefix, timbloptions, format = "Tabbed", dist=True, encod self.instances = [] self.api = None self.debug = debug + self.sklearn = sklearn if sklearn: import scipy as sp self.flushfile = mktemp(prefix=self.fileprefix, dir=flushdir) + self.flushed = 0 else: if os.path.exists(self.fileprefix + ".train") and overwrite: self.flushed = 0 @@ -103,9 +105,9 @@ def validatefeatures(self,features): for feature in features: if isinstance(feature, int) or isinstance(feature, float): validatedfeatures.append( str(feature) ) - elif self.delimiter in feature and not sklearn: + elif self.delimiter in feature and not self.sklearn: raise ValueError("Feature contains delimiter: " + feature) - elif sklearn and isinstance(feature, str): #then is sparse added together + elif self.sklearn and isinstance(feature, str): #then is sparse added together validatedfeatures.append(feature) else: validatedfeatures.append(feature) From 7aadf10758f1f62b6e893e2f40e6ee772c0014c3 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Mon, 14 May 2018 10:17:57 +0200 Subject: [PATCH 20/81] flushdir fix --- sk_timbl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sk_timbl.py b/sk_timbl.py index ad821bb..0be723f 100644 --- a/sk_timbl.py +++ b/sk_timbl.py @@ -6,16 +6,17 @@ class skTiMBL(BaseEstimator, ClassifierMixin): def __init__(self, prefix='timbl', algorithm=0, dist_metric=None, - k=1, normalize=False, debug=0): + k=1, normalize=False, debug=0, flushdir=None): self.prefix = prefix self.algorithm = algorithm self.dist_metric = dist_metric self.k = k self.normalize = normalize self.debug = debug + self.flushdir = flushdir self.classifier = TimblClassifier(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), - debug=True, sklearn=True, flushthreshold=20000) + debug=True, sklearn=True, flushdir=self.flushdir, flushthreshold=20000) def fit(self, X, y=None): X, y = check_X_y(X, y, dtype=np.int64, accept_sparse='csr') @@ -25,7 +26,7 @@ def fit(self, X, y=None): if self.debug: print('Features are sparse, choosing faster learning') self.classifier = TimblClassifier(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), - format='Sparse', debug=True, sklearn=True, flushthreshold=20000) + format='Sparse', debug=True, sklearn=True, flushdir=self.flushdir flushthreshold=20000) for i in range(n_rows): sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] From 507f3c0ec81016b0ec6993a04f4ba8da753e44ba Mon Sep 17 00:00:00 2001 From: Duchadian Date: Tue, 15 May 2018 16:43:05 +0200 Subject: [PATCH 21/81] added utils file --- sk_timbl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sk_timbl.py b/sk_timbl.py index 0be723f..fbfa9fd 100644 --- a/sk_timbl.py +++ b/sk_timbl.py @@ -5,7 +5,7 @@ import numpy as np class skTiMBL(BaseEstimator, ClassifierMixin): - def __init__(self, prefix='timbl', algorithm=0, dist_metric=None, + def __init__(self, prefix='timbl', algorithm=4, dist_metric=None, k=1, normalize=False, debug=0, flushdir=None): self.prefix = prefix self.algorithm = algorithm From 72016b725b7ee0212be72be429afaeedad2d8530 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Tue, 15 May 2018 16:44:34 +0200 Subject: [PATCH 22/81] added utils and modified timbl.py --- timbl.py | 4 -- utils.py | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 4 deletions(-) create mode 100644 utils.py diff --git a/timbl.py b/timbl.py index 99facc3..5280c95 100644 --- a/timbl.py +++ b/timbl.py @@ -375,7 +375,3 @@ def _parsedistribution(self, instance, start=0, end =None): return dist - - - - diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..8a2371a --- /dev/null +++ b/utils.py @@ -0,0 +1,166 @@ +import pandas as pd +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline +from numpy import mean, std +from pprint import pprint + +class EstimatorSelectionHelper: + def __init__(self, models, params, pipe, refit=False, memory=None): + if not set(models.keys()).issubset(set(params.keys())): + missing_params = list(set(models.keys()) - set(params.keys())) + raise ValueError("Some estimators are missing parameters: %s" % missing_params) + self.models = models + self.params = params + self.pipe = pipe + self.model = None + self.refit = refit + self.memory = memory + self.keys = models.keys() + self.grid_searches = {} + + def fill_grid_searches(self, prev_searches): + self.grid_searches = prev_searches + + def fit(self, X, y, cv=10, n_jobs=1, verbose=1, scoring=None): + for key in self.keys: + if not key in self.grid_searches: + print("Running GridSearchCV for %s." % key) + model = self.models[key] + params = self.params[key] + + if self.pipe: + steps = list(x for x in self.pipe) + steps.append((key, model)) + self.model = Pipeline(steps, memory=self.memory) + + gs = GridSearchCV(self.model, params, cv=cv, + n_jobs=n_jobs if not key in ['KNN', 'TIMBL'] else 2, + verbose=verbose, scoring=scoring, refit=self.refit) + gs.fit(X,y) + self.grid_searches[key] = gs + +def join_params(param1, param2): + temp = param1.copy() + temp.update(param2) + return temp + +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils import check_X_y, check_array +from dev_timbl import TimblClassifier +import scipy as sp +import numpy as np + +class skTiMBL(BaseEstimator, ClassifierMixin): + def __init__(self, prefix='timbl', algorithm=4, dist_metric=None, + k=1, normalize=False, debug=0, flushdir=None): + self.prefix = prefix + self.algorithm = algorithm + self.dist_metric = dist_metric + self.k = k + self.normalize = normalize + self.debug = debug + self.flushdir = flushdir + + + def _make_timbl_options(self, *options): + """ + -a algorithm + -m metric + -w weighting + -k amount of neighbours + -d class voting weights + -L frequency threshold + -T which feature index is label + -N max number of features + -H turn hashing on/off + + """ + pass + + + def fit(self, X, y): + X, y = check_X_y(X, y, dtype=np.int64, accept_sparse='csr') + + n_rows = X.shape[0] + self.classes_ = np.unique(y) + + if sp.sparse.issparse(X): + if self.debug: print('Features are sparse, choosing faster learning') + + self.classifier = TimblClassifier(self.prefix, "-a{} -k{} -N{} -vf".format(self.algorithm,self.k, X.shape[1]), + format='Sparse', debug=True, sklearn=True, flushdir=self.flushdir, + flushthreshold=20000, normalize=self.normalize) + + for i in range(n_rows): + sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] + self.classifier.append(sparse,str(y[i])) + + else: + + self.classifier = TimblClassifier(self.prefix, "-a{} -k{} -N{} -vf".format(self.algorithm, self.k, X.shape[1]), + debug=True, sklearn=True, flushdir=self.flushdir, flushthreshold=20000, + normalize=self.normalize) + + if y.dtype != 'O': + y = y.astype(str) + + for i in range(n_rows): + self.classifier.append(list(X[i].toarray()[0]), y[i]) + + self.classifier.train() + return self + + + def _timbl_predictions(self, X, part_index, y=None): + choices = {0 : lambda x : x.append(np.int64(label)), + 1 : lambda x : x.append([np.float(distance)]), + } + X = check_array(X, dtype=np.float64, accept_sparse='csr') + + n_samples = X.shape[0] + + pred = [] + func = choices[part_index] + if sp.sparse.issparse(X): + if self.debug: print('Features are sparse, choosing faster predictions') + + for i in range(n_samples): + sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] + label,proba, distance = self.classifier.classify(sparse) + func(pred) + + else: + for i in range(n_samples): + label,proba, distance = self.classifier.classify(list(X[i].toarray()[0])) + func(pred) + + return np.array(pred) + + + + def predict(self, X, y=None): + return self._timbl_predictions(X, part_index=0) + + + def predict_proba(self, X, y=None): + """ + TIMBL is a discrete classifier. It cannot give probability estimations. + To ensure that scikit-learn functions with TIMBL (and especially metrics + such as ROC_AUC), this method is implemented. + + For ROC_AUC, the classifier corresponds to a single point in ROC space, + instead of a probabilistic continuum such as classifiers that can give + a probability estimation (e.g. Linear classifiers). For an explanation, + see Fawcett (2005). + """" + return predict(X) + + + def decision_function(self, X, y=None): + """ + The decision function is interpreted here as being the distance between + the instance that is being classified and the nearest point in k space. + """ + return self._timbl_predictions(X, part_index=1) + + From ffb847c6f83146c38fd40aa1fafac747742272f5 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Tue, 15 May 2018 19:50:39 +0200 Subject: [PATCH 23/81] moved sktimbl to utils.py --- sk_timbl.py | 89 ----------------------------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 sk_timbl.py diff --git a/sk_timbl.py b/sk_timbl.py deleted file mode 100644 index fbfa9fd..0000000 --- a/sk_timbl.py +++ /dev/null @@ -1,89 +0,0 @@ -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils import check_X_y, check_array -from timbl import TimblClassifier -import scipy as sp -import numpy as np - -class skTiMBL(BaseEstimator, ClassifierMixin): - def __init__(self, prefix='timbl', algorithm=4, dist_metric=None, - k=1, normalize=False, debug=0, flushdir=None): - self.prefix = prefix - self.algorithm = algorithm - self.dist_metric = dist_metric - self.k = k - self.normalize = normalize - self.debug = debug - self.flushdir = flushdir - - self.classifier = TimblClassifier(self.prefix, "-a {} -k {}".format(self.algorithm, self.k), - debug=True, sklearn=True, flushdir=self.flushdir, flushthreshold=20000) - - def fit(self, X, y=None): - X, y = check_X_y(X, y, dtype=np.int64, accept_sparse='csr') - - n_rows = X.shape[0] - if sp.sparse.issparse(X): - if self.debug: print('Features are sparse, choosing faster learning') - - self.classifier = TimblClassifier(self.prefix, "-a {} -k {} -N {}".format(self.algorithm,self.k, X.shape[1]), - format='Sparse', debug=True, sklearn=True, flushdir=self.flushdir flushthreshold=20000) - - for i in range(n_rows): - sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] - self.classifier.append(sparse,str(y[i])) - - else: - if y.dtype != 'O': - y = y.astype(str) - - for i in range(n_rows): - self.classifier.append(list(X[i].toarray()[0]), y[i]) - - self.classifier.train() - return self - - - def predict(self, X, y=None): - X = check_array(X, dtype=np.int64, accept_sparse='csr') - - n_samples = X.shape[0] - pred = [] - - if sp.sparse.issparse(X): - if self.debug: print('Features are sparse, choosing faster predictions') - - for i in range(n_samples): - sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] - y_pred,_, distance = self.classifier.classify(sparse) - pred.append(np.int64(y_pred)) - - else: - for i in range(n_samples): - y_pred,_, distance = self.classifier.classify(list(X[i].toarray()[0])) - pred.append(np.int64(y_pred)) - - return pred - - - def predict_proba(self, X, y=None): - X = check_array(X, dtype=np.float64, accept_sparse='csr') - - n_samples = X.shape[0] - - pred = [] - - if sp.sparse.issparse(X): - print('Features are sparse, choosing faster predictions') - - for i in range(n_samples): - sparse = ['({},{})'.format(i+1, c) for i,c in zip(X[i].indices, X[i].data)] - _,dist, distance = self.classifier.classify(sparse) - pred.append(np.int64(dist)) - - else: - for i in range(n_samples): - _,proba, distance = self.classifier.classify(list(X[i].toarray()[0])) - pred.append(np.float(proba)) - - return pred - From c6062ec237fab110a175054d4b4c1b74b341dff7 Mon Sep 17 00:00:00 2001 From: Duchadian Date: Tue, 15 May 2018 23:25:42 +0200 Subject: [PATCH 24/81] deleting superfluous code --- utils.py | 50 +++----------------------------------------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/utils.py b/utils.py index 8a2371a..8ad4576 100644 --- a/utils.py +++ b/utils.py @@ -1,52 +1,6 @@ -import pandas as pd -from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline -from numpy import mean, std -from pprint import pprint - -class EstimatorSelectionHelper: - def __init__(self, models, params, pipe, refit=False, memory=None): - if not set(models.keys()).issubset(set(params.keys())): - missing_params = list(set(models.keys()) - set(params.keys())) - raise ValueError("Some estimators are missing parameters: %s" % missing_params) - self.models = models - self.params = params - self.pipe = pipe - self.model = None - self.refit = refit - self.memory = memory - self.keys = models.keys() - self.grid_searches = {} - - def fill_grid_searches(self, prev_searches): - self.grid_searches = prev_searches - - def fit(self, X, y, cv=10, n_jobs=1, verbose=1, scoring=None): - for key in self.keys: - if not key in self.grid_searches: - print("Running GridSearchCV for %s." % key) - model = self.models[key] - params = self.params[key] - - if self.pipe: - steps = list(x for x in self.pipe) - steps.append((key, model)) - self.model = Pipeline(steps, memory=self.memory) - - gs = GridSearchCV(self.model, params, cv=cv, - n_jobs=n_jobs if not key in ['KNN', 'TIMBL'] else 2, - verbose=verbose, scoring=scoring, refit=self.refit) - gs.fit(X,y) - self.grid_searches[key] = gs - -def join_params(param1, param2): - temp = param1.copy() - temp.update(param2) - return temp - from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils import check_X_y, check_array -from dev_timbl import TimblClassifier +from timbl import TimblClassifier import scipy as sp import numpy as np @@ -74,6 +28,8 @@ def _make_timbl_options(self, *options): -N max number of features -H turn hashing on/off + This function still has to be made, for now the appropriate arguments + can be passed in fit() """ pass From 6c6c787cfbeae822ff87f0c8e578012497d240be Mon Sep 17 00:00:00 2001 From: Duchadian Date: Fri, 18 May 2018 15:34:02 +0200 Subject: [PATCH 25/81] adjusted utils and added timbl cache --- timbl.py | 1 - utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/timbl.py b/timbl.py index 5280c95..07df83c 100644 --- a/timbl.py +++ b/timbl.py @@ -374,4 +374,3 @@ def _parsedistribution(self, instance, start=0, end =None): return dist - diff --git a/utils.py b/utils.py index 8ad4576..af2ee70 100644 --- a/utils.py +++ b/utils.py @@ -108,7 +108,7 @@ def predict_proba(self, X, y=None): instead of a probabilistic continuum such as classifiers that can give a probability estimation (e.g. Linear classifiers). For an explanation, see Fawcett (2005). - """" + """ return predict(X) From 9766ee8d653f0835b2ca39f6a6285a3a357fea4d Mon Sep 17 00:00:00 2001 From: Duchadian Date: Fri, 18 May 2018 15:43:40 +0200 Subject: [PATCH 26/81] updated README --- README.rst | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 74971a5..1cac069 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ through the C++ interface is also available to Python scripts. Being able to access the API from Python greatly facilitates prototyping TiMBL-based applications. -This is the 2013 release by Maarten van Gompel, building on the 2006 release by Sander Canisius. For those used to the old library, there is one backwards-incompatible change, adapt your scripts to use ``import timblapi`` instead of ``import timbl``, as the latter is now a higher-level interface. +This is the 2013 release by Maarten van Gompel, building on the 2006 release by Sander Canisius. For those used to the old library, there is one backwards-incompatible change, adapt your scripts to use ``import timblapi`` instead of ``import timbl``, as the latter is now a higher-level interface. License ======= @@ -42,7 +42,7 @@ timbl in their distribution's package repository. In the remainder of this section, it is assumed that ``$TIMBL_HEADERS`` points to the directory that contains ``timbl/TimblAPI.h``, and ``$TIMBL_LIBS`` the directory that has contains the Timbl libraries. Note that Timbl itself depends on additional -dependencies. +dependencies. The second prerequisite is Boost.Python, a library that facilitates writing Python extension modules in C++. Many Linux distributions come with prebuilt @@ -76,11 +76,11 @@ and can then be built and installed with the following command, use --timbl-include-dir=$TIMBL_HEADERS \ --timbl-library-dir=$TIMBL_LIBS \ install --prefix=/dir/to/install/in - + This is the verbose variant, if default locations are used then the following may suffice already:: - $ sudo python setup3.py install - + $ sudo python setup3.py install + The ``--prefix`` option to the install command denotes the directory in which the module is to be installed. If you have the appropriate system permissions, you can leave out this option. The module will then be installed in the Python system tree. Otherwise, make sure that the installation directory is in the module search path of your Python system. @@ -88,7 +88,7 @@ system. Usage ======= -python-timbl offers two interface to the timbl API. A low-level interface contained in the module ``timblapi``, which is very much like the C++ library, and a high-level object oriented interface in the ``timbl`` module, which offers a ``TimblClassifier`` class. +python-timbl offers two interface to the timbl API. A low-level interface contained in the module ``timblapi``, which is very much like the C++ library, and a high-level object oriented interface in the ``timbl`` module, which offers a ``TimblClassifier`` class. timbl.TimblClassifier: High-level interface ---------------------------------------------- @@ -107,18 +107,18 @@ Training instances can be added using the ``append(featurevector, classlabel)`` classifier.append( (1,0,0), 'financial') classifier.append( (0,1,0), 'furniture') classifier.append( (0,0,1), 'geographic') - + Subsequently, you invoke the actual training, note that at each step Timbl may output considerable details about what it is doing to standard error output:: classifier.train() - + The results of this training is an instance base file, which you can save to file so you can load it again later:: classifier.save() - - classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) - classifier.load() - + + classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) + classifier.load() + The main advantage of the Python library is the fact that you can classify instances on the fly as follows, just pass a feature vector and optionally also a class label to ``classify(featurevector, classlabel)``:: @@ -132,7 +132,7 @@ You can also create a test file and test it all at once:: classifier.addinstance("testfile", (1,0,0),'financial' ) #addinstance can be used to add instances to external files (use append() for training) classifier.addinstance("testfile", (0,1,0),'furniture' ) classifier.addinstance("testfile", (0,0,1),'geograpic' ) - classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture + classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture classifier.test("testfile") print "Accuracy: ", classifier.getAccuracy() @@ -152,7 +152,7 @@ exists for this ``classify`` method. If you do not set this option, everything will still work fine, but you won't benefit from actual concurrency due to Python's the Global Interpret Lock. - + timblapi: Low-level interface ------------------------------- @@ -199,4 +199,6 @@ manually call the ``initthreading()`` method. Three TiMBL API methods print information to a standard C++ output stream object (ShowBestNeighbors, ShowOptions, ShowSettings, ShowSettings). In the Python interface, these methods will only work with Python (stream) objects that have a fileno method returning a valid file descriptor. Alternatively, three new methods are provided (bestNeighbo(u)rs, options, settings); these methods return the same information as a Python string object. +**scikit-learn wrapper** +A wrapper for use in scikit-learn has been added. It was designed for use in scikit-learn Pipeline objects. The wrapper is not finished and has to date only been tested on sparse data. Note that TiMBL does not work well with large amounts of features. It is suggested to reduce the amount of features to a number below 100 to keep system performance reasonable. Use on servers with large amounts of memory and processing cores advised. From 2c43318373112919ca67d519ec3bd2d50c51ffdc Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 23 May 2018 23:06:41 +0200 Subject: [PATCH 27/81] syntax fix after merge --- timbl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/timbl.py b/timbl.py index 07df83c..2670e02 100644 --- a/timbl.py +++ b/timbl.py @@ -85,12 +85,12 @@ def __init__(self, fileprefix, timbloptions, format = "Tabbed", dist=True, encod self.instances = [] self.api = None self.debug = debug - self.sklearn = sklearn + self.sklearn = sklearn if sklearn: import scipy as sp self.flushfile = mktemp(prefix=self.fileprefix, dir=flushdir) - self.flushed = 0 + self.flushed = 0 else: if os.path.exists(self.fileprefix + ".train") and overwrite: self.flushed = 0 From e3c876711fa7f60648cfb1e4066c421a65faf524 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 15 Jan 2019 23:52:58 +0100 Subject: [PATCH 28/81] Added latest DOI badge --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1cac069..e61069d 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,9 @@ .. image:: http://applejack.science.ru.nl/lamabadge.php/python-timbl :target: http://applejack.science.ru.nl/languagemachines/ +.. image:: https://zenodo.org/badge/8136669.svg + :target: https://zenodo.org/badge/latestdoi/8136669 + ====================== README: python-timbl ====================== @@ -201,4 +204,4 @@ Three TiMBL API methods print information to a standard C++ output stream object **scikit-learn wrapper** -A wrapper for use in scikit-learn has been added. It was designed for use in scikit-learn Pipeline objects. The wrapper is not finished and has to date only been tested on sparse data. Note that TiMBL does not work well with large amounts of features. It is suggested to reduce the amount of features to a number below 100 to keep system performance reasonable. Use on servers with large amounts of memory and processing cores advised. +A wrapper for use in scikit-learn has been added. It was designed for use in scikit-learn Pipeline objects. The wrapper is not finished and has to date only been tested on sparse data. Note that TiMBL does not work well with large amounts of features. It is suggested to reduce the amount of features to a number below 100 to keep system performance reasonable. Use on servers with large amounts of memory and processing cores advised. From 0feaddd731e6ffd1a31ebf8871f1268c6da5ff8e Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Thu, 17 Oct 2019 16:33:35 +0200 Subject: [PATCH 29/81] added repo status badge --- README.rst | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 74971a5..c946a5c 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,10 @@ .. image:: http://applejack.science.ru.nl/lamabadge.php/python-timbl :target: http://applejack.science.ru.nl/languagemachines/ +.. image:: https://www.repostatus.org/badges/latest/active.svg + :alt: Project Status: Active – The project has reached a stable, usable state and is being actively developed. + :target: https://www.repostatus.org/#active + ====================== README: python-timbl ====================== @@ -15,7 +19,7 @@ through the C++ interface is also available to Python scripts. Being able to access the API from Python greatly facilitates prototyping TiMBL-based applications. -This is the 2013 release by Maarten van Gompel, building on the 2006 release by Sander Canisius. For those used to the old library, there is one backwards-incompatible change, adapt your scripts to use ``import timblapi`` instead of ``import timbl``, as the latter is now a higher-level interface. +This is the 2013 release by Maarten van Gompel, building on the 2006 release by Sander Canisius. For those used to the old library, there is one backwards-incompatible change, adapt your scripts to use ``import timblapi`` instead of ``import timbl``, as the latter is now a higher-level interface. License ======= @@ -42,7 +46,7 @@ timbl in their distribution's package repository. In the remainder of this section, it is assumed that ``$TIMBL_HEADERS`` points to the directory that contains ``timbl/TimblAPI.h``, and ``$TIMBL_LIBS`` the directory that has contains the Timbl libraries. Note that Timbl itself depends on additional -dependencies. +dependencies. The second prerequisite is Boost.Python, a library that facilitates writing Python extension modules in C++. Many Linux distributions come with prebuilt @@ -76,11 +80,11 @@ and can then be built and installed with the following command, use --timbl-include-dir=$TIMBL_HEADERS \ --timbl-library-dir=$TIMBL_LIBS \ install --prefix=/dir/to/install/in - + This is the verbose variant, if default locations are used then the following may suffice already:: - $ sudo python setup3.py install - + $ sudo python setup3.py install + The ``--prefix`` option to the install command denotes the directory in which the module is to be installed. If you have the appropriate system permissions, you can leave out this option. The module will then be installed in the Python system tree. Otherwise, make sure that the installation directory is in the module search path of your Python system. @@ -88,7 +92,7 @@ system. Usage ======= -python-timbl offers two interface to the timbl API. A low-level interface contained in the module ``timblapi``, which is very much like the C++ library, and a high-level object oriented interface in the ``timbl`` module, which offers a ``TimblClassifier`` class. +python-timbl offers two interface to the timbl API. A low-level interface contained in the module ``timblapi``, which is very much like the C++ library, and a high-level object oriented interface in the ``timbl`` module, which offers a ``TimblClassifier`` class. timbl.TimblClassifier: High-level interface ---------------------------------------------- @@ -107,18 +111,18 @@ Training instances can be added using the ``append(featurevector, classlabel)`` classifier.append( (1,0,0), 'financial') classifier.append( (0,1,0), 'furniture') classifier.append( (0,0,1), 'geographic') - + Subsequently, you invoke the actual training, note that at each step Timbl may output considerable details about what it is doing to standard error output:: classifier.train() - + The results of this training is an instance base file, which you can save to file so you can load it again later:: classifier.save() - - classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) - classifier.load() - + + classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) + classifier.load() + The main advantage of the Python library is the fact that you can classify instances on the fly as follows, just pass a feature vector and optionally also a class label to ``classify(featurevector, classlabel)``:: @@ -132,7 +136,7 @@ You can also create a test file and test it all at once:: classifier.addinstance("testfile", (1,0,0),'financial' ) #addinstance can be used to add instances to external files (use append() for training) classifier.addinstance("testfile", (0,1,0),'furniture' ) classifier.addinstance("testfile", (0,0,1),'geograpic' ) - classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture + classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture classifier.test("testfile") print "Accuracy: ", classifier.getAccuracy() @@ -152,7 +156,7 @@ exists for this ``classify`` method. If you do not set this option, everything will still work fine, but you won't benefit from actual concurrency due to Python's the Global Interpret Lock. - + timblapi: Low-level interface ------------------------------- From 8108b85860cbe10953b52d71080cbaa5d07deaf0 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 9 Dec 2019 00:20:06 +0100 Subject: [PATCH 30/81] add lib64 to library search path --- setup3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup3.py b/setup3.py index a38ab89..7b775a4 100755 --- a/setup3.py +++ b/setup3.py @@ -49,7 +49,7 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib'] + libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib', '/usr/local/lib64'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') From 629fd8fe1a3243d4a6da9be2660fb4750ebc135b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 9 Dec 2019 00:22:22 +0100 Subject: [PATCH 31/81] same for python2 and version bump --- setup2.py | 4 ++-- setup3.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup2.py b/setup2.py index 4e4e8d1..4b4e80d 100755 --- a/setup2.py +++ b/setup2.py @@ -52,7 +52,7 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib'] + libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib', '/usr/local/lib64'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') @@ -193,7 +193,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2018.04.23", + version="2019.12.09", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 7b775a4..3ed88f7 100755 --- a/setup3.py +++ b/setup3.py @@ -194,7 +194,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2018.04.23", + version="2019.12.09", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From a96d1bdf35f56f3691095f4c860d0768d26b53ae Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 15 Apr 2020 17:01:43 +0200 Subject: [PATCH 32/81] changing fallback default for boost_python3 (debug attempt, not expecting much) --- setup3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup3.py b/setup3.py index 3ed88f7..d703ba3 100755 --- a/setup3.py +++ b/setup3.py @@ -94,7 +94,7 @@ def initialize_options(self): def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None self.boost_include_dir = None - self.boostlib = "boost_python" + self.boostlib = "boost_python3" if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew libsearch.insert(0,'/usr/local/opt/boost-python3/lib') From 27b5917cd965ec67ec917a11d998889a8a1bb322 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 19 May 2020 15:49:15 +0200 Subject: [PATCH 33/81] fixing wrong distribution computation #4 --- setup3.py | 2 +- src/timblapi.cc | 26 ++++++++------------------ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/setup3.py b/setup3.py index d703ba3..3ed88f7 100755 --- a/setup3.py +++ b/setup3.py @@ -94,7 +94,7 @@ def initialize_options(self): def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None self.boost_include_dir = None - self.boostlib = "boost_python3" + self.boostlib = "boost_python" if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew libsearch.insert(0,'/usr/local/opt/boost-python3/lib') diff --git a/src/timblapi.cc b/src/timblapi.cc index 65fb509..190906e 100755 --- a/src/timblapi.cc +++ b/src/timblapi.cc @@ -241,31 +241,21 @@ bool TimblApiWrapper::showSettings(object& stream) python::dict TimblApiWrapper::dist2dict(const Timbl::ValueDistribution * distribution, bool normalize, double minf) const { python::dict result; - size_t freq; - - double maxfreq = 0; - + double freq; + double sum = 0.0; if (normalize) { - Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); - while ( it != distribution->end() ){ - Timbl::Vfield *f = it->second; - if (f->Freq() > maxfreq) maxfreq = f->Freq(); - ++it; + for (Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { + sum += it->second->Weight(); } } - - Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); - while ( it != distribution->end() ){ - Timbl::Vfield *f = it->second; + for (Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { if (normalize) { - freq = f->Freq() / maxfreq; - } else { - freq = f->Freq(); + it->second->SetWeight(it->second->Weight() / sum); } + freq = it->second->Weight(); if ( freq >= minf ){ - result[f->Value()->Name()] = freq; + result[it->second->Value()->Name()] = freq; } - ++it; } return result; From 872944d6564ddd612b705efa1fed64a31e9905b6 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 19 May 2020 20:26:39 +0200 Subject: [PATCH 34/81] minor formatting --- example.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/example.py b/example.py index edb9a0b..8dbf33c 100755 --- a/example.py +++ b/example.py @@ -7,13 +7,13 @@ import timbl import os -#We are building a very simple context-aware translator Word Sense Disambiguator for the word "bank", based on the occurrence of some keywords in the same sentence: +#We are building a very simple context-aware translator Word Sense Disambiguator for the word "bank", based on the occurrence of some keywords in the same sentence: -# The features are binary and represent presence or absence of certain keywords. We choose: +# The features are binary and represent presence or absence of certain keywords. We choose: # - money # - sit # - river -#They have a value of 0 or 1 (but note that Timbl support string features just as well!) +#They have a value of 0 or 1 (but note that Timbl support string features just as well!) #The classes we predict are: # - financial @@ -35,7 +35,7 @@ #We start anew and load the classifier again (of course we could have just skipped this and the save step and continued immediately) classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) #wsd-bank will be the prefix of any files written for timbl -classifier.load() #even if this is omitted it will still work, the first classify() call will invoke load() +classifier.load() #even if this is omitted it will still work, the first classify() call will invoke load() #Let's classify an instance: classlabel, distribution, distance = classifier.classify( (1,0,0) ) @@ -61,7 +61,7 @@ classifier.addinstance("testfile", (1,0,0),'financial' ) #addinstance can be used to add instances to external files (use append() for training) classifier.addinstance("testfile", (0,1,0),'furniture' ) classifier.addinstance("testfile", (0,0,1),'geograpic' ) -classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture +classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture classifier.test("testfile") From b65d2814d093c52ff942532cb8abb3a57b0b4ec9 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 19 May 2020 20:28:16 +0200 Subject: [PATCH 35/81] version bump --- setup2.py | 2 +- setup3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup2.py b/setup2.py index 4b4e80d..a2cf493 100755 --- a/setup2.py +++ b/setup2.py @@ -193,7 +193,7 @@ def build_extensions(self): setup( name="python-timbl", - version="2019.12.09", + version="2020.05.19", description="Python 2 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/setup3.py b/setup3.py index 3ed88f7..85af12e 100755 --- a/setup3.py +++ b/setup3.py @@ -194,7 +194,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2019.12.09", + version="2020.05.19", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From ca80a3e267ce88e9ad7422afb73b281e8037236b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 19 May 2020 20:29:59 +0200 Subject: [PATCH 36/81] Dropping python 2 support --- README.rst | 7 +- example.py | 3 - setup3.py => setup.py | 4 +- setup2.py | 205 ------------------------------------------ 4 files changed, 5 insertions(+), 214 deletions(-) rename setup3.py => setup.py (98%) delete mode 100755 setup2.py diff --git a/README.rst b/README.rst index 9e9ff1c..b4d9a99 100644 --- a/README.rst +++ b/README.rst @@ -24,6 +24,8 @@ TiMBL-based applications. This is the 2013 release by Maarten van Gompel, building on the 2006 release by Sander Canisius. For those used to the old library, there is one backwards-incompatible change, adapt your scripts to use ``import timblapi`` instead of ``import timbl``, as the latter is now a higher-level interface. +Since 2020, this only supports Python 3, Python 2 support has been deprecated. + License ======= @@ -74,10 +76,9 @@ obtained through github:: $ git clone git://github.com/proycon/python-timbl.git $ cd python-timbl -and can then be built and installed with the following command, use -``setup2.py`` for Python 2 and ``setup3.py`` for Python 3:: +and can then be built and installed with the following command:: - $ sudo python setup3.py \ + $ sudo python3 setup.py \ build_ext --boost-include-dir=$BOOST_HEADERS \ --boost-library-dir=$BOOST_LIBS \ --timbl-include-dir=$TIMBL_HEADERS \ diff --git a/example.py b/example.py index 8dbf33c..cb3edaf 100755 --- a/example.py +++ b/example.py @@ -1,9 +1,6 @@ #! /usr/bin/env python # -*- coding: utf8 -*- - -from __future__ import print_function, unicode_literals, division, absolute_import #Make Python 2.x act as much like Python 3 as possible - import timbl import os diff --git a/setup3.py b/setup.py similarity index 98% rename from setup3.py rename to setup.py index 85af12e..18e8bc0 100755 --- a/setup3.py +++ b/setup.py @@ -1,12 +1,10 @@ -#!/usr/bin/python3 +#!/bin/env python3 import sys import os import shutil import platform import glob -if os.path.exists('setup3.py'): - shutil.copyfile("setup3.py","setup.py") from distutils.core import setup, Extension from distutils.command.build_ext import build_ext diff --git a/setup2.py b/setup2.py deleted file mode 100755 index a2cf493..0000000 --- a/setup2.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/python3 - -import sys -import os -import shutil -import platform -import glob -if os.path.exists('setup2.py'): - shutil.copyfile("setup2.py","setup.py") - -from itertools import ifilter - -from distutils.core import setup, Extension -from distutils.command.build_ext import build_ext -from distutils.dep_util import newer -from distutils.unixccompiler import UnixCCompiler - - -def updateDocHeader(input, output): - docstrings = {} - execfile(input, docstrings) - - stream = open(output, "w") - print >> stream, "#ifndef TIMBL_DOC_H" - print >> stream, "#define TIMBL_DOC_H\n" - print >> stream, "#include \n" - - for var in ifilter(lambda v: v.endswith("_DOC"), docstrings): - print >> stream, "PyDoc_STRVAR(%s, \"%s\");\n" % ( - var, docstrings[var].strip().encode("string_escape")) - - print >> stream, "#endif" - - stream.close() - - -class BuildExt(build_ext): - - user_options = build_ext.user_options + [ - ("boost-include-dir=", None, "directory for boost header files"), - ("boost-library-dir=", None, "directory for boost library files"), - ("timbl-include-dir=", None, "directory for TiMBL files"), - ("timbl-library-dir=", None, "directory for TiMBL library files"), - ("libxml2-include-dir=", None, "directory for LibXML2 files"), - ("libxml2-library-dir=", None, "directory for LibXML2 library files"), - ("static-boost-python3", "s", "statically link boost-python")] - - boolean_options = build_ext.boolean_options + [ - "static-boost-python3"] - - - def initialize_options(self): - build_ext.initialize_options(self) - pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib', '/usr/local/lib64'] - includesearch = ['/usr/include', '/usr/local/include'] - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): - libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include'): - includesearch.insert(0, os.environ['VIRTUAL_ENV'] + '/include') - - #Find boost - self.findboost(libsearch, includesearch, pyversion) - - #Find libxml2 - if os.path.exists('/usr/local/Cellar/libxml2'): - #Mac OS X with homebrew - versiondirs = [] - for d in glob.glob('/usr/local/Cellar/libxml2/*'): - if os.path.isdir(d) and d[0] != '.': - versiondirs.append(os.path.basename(d)) - if versiondirs: - versiondirs.sort() - version = versiondirs[0] - libsearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/lib') - includesearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/include') - - for d in includesearch: - if os.path.exists(d + '/libxml2'): - self.libxml2_include_dir = d + '/libxml2' - self.libxml2_library_dir = d.replace('include','lib') - break - - #Find timbl - self.timbl_library_dir = None - for d in includesearch: - if os.path.exists(d + '/timbl'): - self.timbl_include_dir = d - self.timbl_library_dir = d.replace('include','lib') - break - - if self.timbl_library_dir is None: - raise Exception("Timbl not found, make sure to install Timbl and set --timbl-include-dir and --timbl-library-dir appropriately...") - - self.static_boost_python = False - - def findboost(self, libsearch, includesearch, pyversion): - self.boost_library_dir = None - self.boost_include_dir = None - self.boostlib = "boost_python" - if os.path.exists('/usr/local/opt/boost-python'): - #Mac OS X with homebrew - libsearch.insert(0,'/usr/local/opt/boost-python/lib') - libsearch.insert(0,'/usr/local/opt/boost/lib') - includesearch.insert(0,'/usr/local/opt/boost/include') - - for d in libsearch: - if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): - self.boost_library_dir = d - self.boostlib = "boost_python-py" + pyversion - break - elif os.path.exists(d + "/libboost_python2.so"): - self.boost_library_dir = d - self.boostlib = "boost_python2" - break - elif os.path.exists(d + "/libboost_python.so"): - #probably goes wrong if this is for python 3! - self.boost_library_dir = d - self.boostlib = "boost_python" - break - elif os.path.exists(d + "/libboost_python" + pyversion + ".dylib"): #Mac OS X - self.boost_library_dir = d - self.boostlib = "boost_python" + pyversion - break - elif os.path.exists(d + "/libboost_python.dylib"): #Mac OS X - self.boost_library_dir = d - #probably goes wrong if this is for python 2! - self.boostlib = "boost_python" - break - for d in includesearch: - if os.path.exists(d + "/boost"): - self.boost_include_dir = d - - if self.boost_library_dir is not None: - print >>sys.stderr, "Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")" - else: - print >>sys.stderr, "Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?" - self.boost_library_dir = libsearch[0] - if self.boost_include_dir: - print >>sys.stderr, "Detected boost headers in " + self.boost_include_dir - else: - print >>sys.stderr, "Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-include-dir" - self.boost_include_dir = includesearch[0] - - def finalize_options(self): - build_ext.finalize_options(self) - self.ensure_file_exists("boost_include_dir", "boost/python.hpp") - self.ensure_dirname("boost_library_dir") - self.ensure_file_exists("timbl_include_dir", "timbl/TimblAPI.h") - self.ensure_dirname("timbl_library_dir") - self.ensure_file_exists("libxml2_include_dir", "libxml/tree.h") - self.ensure_dirname("libxml2_library_dir") - - def ensure_file_exists(self, option, filename): - self.ensure_dirname(option) - self._ensure_tested_string( - option, - lambda d: os.path.isfile(os.path.join(d, filename)), - "directory name", - "'%s' was not found in '%%s'" % filename) - - def build_extensions(self): - if newer("src/docstrings.h.in", "src/docstrings.h"): - updateDocHeader("src/docstrings.h.in", "src/docstrings.h") - - for ext in self.extensions: - ext.include_dirs.append(self.boost_include_dir) - ext.include_dirs.append(self.timbl_include_dir) - ext.include_dirs.append(self.libxml2_include_dir) - ext.library_dirs.append(self.timbl_library_dir) - ext.library_dirs.append(self.boost_library_dir) - ext.library_dirs.append(self.libxml2_library_dir) - - - compile_args = ["-std=c++11"] - if platform.system() == "Darwin": - compile_args.append("-stdlib=libc++") - ext.extra_compile_args.extend(compile_args) - if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: - ext.extra_link_args.extend( - "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) - else: - ext.libraries.append(self.boostlib) - - - build_ext.build_extensions(self) - - -timblModule = Extension("timblapi", ["src/timblapi.cc"], - libraries=["timbl"], - depends=["src/timblapi.h", "src/docstrings.h"]) - - -setup( - name="python-timbl", - version="2020.05.19", - description="Python 2 language binding for the Tilburg Memory-Based Learner", - author="Sander Canisius, Maarten van Gompel", - author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", - url="http://github.com/proycon/python-timbl", - classifiers=["Development Status :: 4 - Beta","Topic :: Text Processing :: Linguistic","Topic :: Scientific/Engineering","Programming Language :: Python :: 3","Operating System :: POSIX","Intended Audience :: Developers","Intended Audience :: Science/Research","License :: OSI Approved :: GNU General Public License v3 (GPLv3)"], - license="GPL", - py_modules=['timbl'], - ext_modules=[timblModule], - cmdclass={"build_ext": BuildExt}) From d5d1068394f3c126a1a6c3963ca22a4bcfe4d1e0 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 19 May 2020 20:39:13 +0200 Subject: [PATCH 37/81] cleanup after python 2 deprecation --- timbl.py | 88 ++++++++++++++------------------------------------------ 1 file changed, 22 insertions(+), 66 deletions(-) diff --git a/timbl.py b/timbl.py index 2670e02..604b377 100644 --- a/timbl.py +++ b/timbl.py @@ -13,50 +13,28 @@ from __future__ import absolute_import import sys -if sys.version < '3': - from codecs import getwriter - stderr = getwriter('utf-8')(sys.stderr) - stdout = getwriter('utf-8')(sys.stdout) -else: - stderr = sys.stderr - stdout = sys.stdout - from tempfile import mktemp import timblapi import io import os +stderr = sys.stderr +stdout = sys.stdout + + class LoadException(Exception): pass class ClassifyException(Exception): pass -def b(s): - """Conversion to bytes""" - if sys.version < '3': - if isinstance(s, unicode): #pylint: disable=undefined-variable - return s.encode('utf-8') - else: - return s - #else: - # if isinstance(s, str): - # return s.encode('utf-8') def u(s, encoding = 'utf-8', errors='strict'): - #ensure s is properly unicode.. wrapper for python 2.6/2.7, - if sys.version < '3': - #ensure the object is unicode - if isinstance(s, unicode): #pylint: disable=undefined-variable - return s - else: - return unicode(s, encoding,errors=errors) #pylint: disable=undefined-variable + #will work on byte arrays + if isinstance(s, str): + return s else: - #will work on byte arrays - if isinstance(s, str): - return s - else: - return str(s,encoding,errors=errors) + return str(s,encoding,errors=errors) class TimblClassifier(object): @@ -167,16 +145,13 @@ def train(self, save=False): if self.dist: options += " +v+db +v+di" print("Calling Timbl API for training: " + options, file=stderr) - if sys.version < '3': - self.api = timblapi.TimblAPI(b(options), b"") - else: - self.api = timblapi.TimblAPI(options,"") + self.api = timblapi.TimblAPI(options,"") if self.debug: print("Enabling debug for timblapi",file=stderr) self.api.enableDebug() trainfile = filepath - self.api.learn(b(trainfile)) + self.api.learn(trainfile) if save: self.save() if self.threading: @@ -185,8 +160,8 @@ def train(self, save=False): def save(self): if not self.api: raise Exception("No API instantiated, did you train the classifier first?") - self.api.writeInstanceBase(b(self.fileprefix + ".ibase")) - self.api.saveWeights(b(self.fileprefix + ".wgt")) + self.api.writeInstanceBase(self.fileprefix + ".ibase") + self.api.saveWeights(self.fileprefix + ".wgt") def classify(self, features, allowtopdistribution=True): @@ -198,9 +173,9 @@ def classify(self, features, allowtopdistribution=True): testinstance = self.delimiter.join(features) + (self.delimiter if not self.delimiter == '' else ' ') + "?" if self.dist: if self.threading: - result, cls, distribution, distance = self.api.classify3safe(b(testinstance), self.normalize, int(not allowtopdistribution)) + result, cls, distribution, distance = self.api.classify3safe(testinstance, self.normalize, int(not allowtopdistribution)) else: - result, cls, distribution, distance = self.api.classify3(b(testinstance), self.normalize, int(not allowtopdistribution)) + result, cls, distribution, distance = self.api.classify3(testinstance, self.normalize, int(not allowtopdistribution)) if result: cls = u(cls) return (cls, distribution, distance) @@ -229,15 +204,12 @@ def load(self): raise LoadException("Instance base '"+self.fileprefix+".ibase' not found, did you train and save the classifier first?") options = "-F " + self.format + " " + self.timbloptions - if sys.version < '3': - self.api = timblapi.TimblAPI(b(options), b"") - else: - self.api = timblapi.TimblAPI(options, "") + self.api = timblapi.TimblAPI(options, "") if self.debug: print("Enabling debug for timblapi",file=stderr) self.api.enableDebug() print("Calling Timbl API : " + options,file=stderr) - self.api.getInstanceBase(b(self.fileprefix + '.ibase')) + self.api.getInstanceBase(self.fileprefix + '.ibase') #if os.path.exists(self.fileprefix + ".wgt"): # self.api.getWeights(self.fileprefix + '.wgt') if self.threading: @@ -261,10 +233,7 @@ def test(self, testfile): """Test on an existing testfile and return the accuracy""" if not self.api: self.load() - if sys.version < '3': - self.api.test(b(testfile), b(self.fileprefix + '.out'),b'') - else: - self.api.test(u(testfile), u(self.fileprefix + '.out'),'') + self.api.test(u(testfile), u(self.fileprefix + '.out'),'') return self.api.getAccuracy() @@ -272,18 +241,12 @@ def crossvalidate(self, foldsfile): """Train & Test using cross validation, testfile is a file that contains the filenames of all the folds!""" options = "-F " + self.format + " " + self.timbloptions + " -t cross_validate" print("Instantiating Timbl API : " + options,file=stderr) - if sys.version < '3': - self.api = timblapi.TimblAPI(b(options), b"") - else: - self.api = timblapi.TimblAPI(options, "") + self.api = timblapi.TimblAPI(options, "") if self.debug: print("Enabling debug for timblapi",file=stderr) self.api.enableDebug() print("Calling Timbl Test : " + options,file=stderr) - if sys.version < '3': - self.api.test(b(foldsfile),b'',b'') - else: - self.api.test(u(foldsfile),'','') + self.api.test(u(foldsfile),'','') a = self.api.getAccuracy() del self.api return a @@ -294,20 +257,13 @@ def leaveoneout(self): """Train & Test using leave one out""" traintestfile = self.fileprefix + '.train' options = "-F " + self.format + " " + self.timbloptions + " -t leave_one_out" - if sys.version < '3': - self.api = timblapi.TimblAPI(b(options), b"") - else: - self.api = timblapi.TimblAPI(options, "") + self.api = timblapi.TimblAPI(options, "") if self.debug: print("Enabling debug for timblapi",file=stderr) self.api.enableDebug() print("Calling Timbl API : " + options,file=stderr) - if sys.version < '3': - self.api.learn(b(traintestfile)) - self.api.test(b(traintestfile), b(self.fileprefix + '.out'),b'') - else: - self.api.learn(u(traintestfile)) - self.api.test(u(traintestfile), u(self.fileprefix + '.out'),'') + self.api.learn(u(traintestfile)) + self.api.test(u(traintestfile), u(self.fileprefix + '.out'),'') return self.api.getAccuracy() def readtestoutput(self): From acdb8d57c6c7a444a74ab28fd00ed499879b6201 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 8 Jun 2020 22:37:07 +0200 Subject: [PATCH 38/81] fixing finding boost problems... again.. --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 18e8bc0..c9dd799 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 - libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/local/lib', '/usr/local/lib64'] + libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/lib/x86_64-linux-gnu/', '/usr/local/lib', '/usr/local/lib64'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') @@ -104,6 +104,10 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = d self.boostlib = "boost_python-py" + pyversion break + elif os.path.exists(d + "/libboost_python"+pyversion+".so"): + self.boost_library_dir = d + self.boostlib = "boost_python" + pyversion + break elif os.path.exists(d + "/libboost_python3.so"): self.boost_library_dir = d self.boostlib = "boost_python3" @@ -192,7 +196,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2020.05.19", + version="2020.06.08", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 7ae3e7a8e3cc52e1b7144787e86ab735459620d7 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:02:08 +0100 Subject: [PATCH 39/81] updated for newer timbl API --- setup.py | 4 ++-- src/timblapi.cc | 14 ++++++++------ src/timblapi.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index c9dd799..58316db 100755 --- a/setup.py +++ b/setup.py @@ -175,7 +175,7 @@ def build_extensions(self): ext.library_dirs.append(self.boost_library_dir) ext.library_dirs.append(self.libxml2_library_dir) - compile_args = ["-std=c++11"] + compile_args = ["-std=c++17"] if platform.system() == "Darwin": compile_args.append("-stdlib=libc++") ext.extra_compile_args.extend(compile_args) @@ -196,7 +196,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2020.06.08", + version="2024.10.28", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/src/timblapi.cc b/src/timblapi.cc index 190906e..b3a0f04 100755 --- a/src/timblapi.cc +++ b/src/timblapi.cc @@ -49,6 +49,7 @@ #include "timbl/Instance.h" #include "docstrings.h" +#include #include #include @@ -87,7 +88,7 @@ tuple TimblApiWrapper::classify3(const std::string& line, bool normalize, const { std::string cls; double distance; - const Timbl::ValueDistribution * distrib; + const Timbl::ClassDistribution * distrib; const Timbl::TargetValue * result = Classify(line, distrib , distance); if (result != NULL) { if ((requireddepth > 0) && (matchDepth() < requireddepth)) { @@ -141,9 +142,10 @@ tuple TimblApiWrapper::classify3safe(const std::string& line, bool normalize,con Timbl::TimblExperiment * clonedexp = getexperimentforthread(); - const Timbl::ValueDistribution * distrib; + const Timbl::ClassDistribution * distrib; double distance; - const Timbl::TargetValue * result = clonedexp->Classify(line, distrib,distance); + const auto line_unicode = TiCC::toUnicodeString(line); + const Timbl::TargetValue * result = clonedexp->Classify(line_unicode, distrib,distance); if (result != NULL) { if ((requireddepth > 0) && (clonedexp->matchDepth() < requireddepth)) { PyEval_RestoreThread(m_thread_state); @@ -238,17 +240,17 @@ bool TimblApiWrapper::showSettings(object& stream) } -python::dict TimblApiWrapper::dist2dict(const Timbl::ValueDistribution * distribution, bool normalize, double minf) const { +python::dict TimblApiWrapper::dist2dict(const Timbl::ClassDistribution * distribution, bool normalize, double minf) const { python::dict result; double freq; double sum = 0.0; if (normalize) { - for (Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { + for (Timbl::ClassDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { sum += it->second->Weight(); } } - for (Timbl::ValueDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { + for (Timbl::ClassDistribution::VDlist::const_iterator it = distribution->begin(); it != distribution->end(); it++) { if (normalize) { it->second->SetWeight(it->second->Weight() / sum); } diff --git a/src/timblapi.h b/src/timblapi.h index 6390a9b..6a28b4f 100644 --- a/src/timblapi.h +++ b/src/timblapi.h @@ -64,7 +64,7 @@ class TimblApiWrapper : public Timbl::TimblAPI { private: std::vector > experimentpool; Timbl::TimblExperiment * detachedexp; - python::dict dist2dict(const Timbl::ValueDistribution * dist, bool=true,double=0) const; + python::dict dist2dict(const Timbl::ClassDistribution * dist, bool=true,double=0) const; pthread_mutex_t lock; //global lock bool debug; int runningthreads; From 3680155e14486f3a57b3f0c851d9c4b9186bf1db Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:14:23 +0100 Subject: [PATCH 40/81] detect python >= 3.10 --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 58316db..183507a 100755 --- a/setup.py +++ b/setup.py @@ -46,7 +46,8 @@ class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) - pyversion = sys.version[0:3][0] + sys.version[0:3][2] #returns something like 32 + pyversion = sys.version.split(" ")[0] + pyversion = pyversion.split(".")[0] + pyversion.split(".")[1] #returns something like 312 for 3.12 libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/lib/x86_64-linux-gnu/', '/usr/local/lib', '/usr/local/lib64'] includesearch = ['/usr/include', '/usr/local/include'] if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): From a49c0146e6934c2a8289171bc7b08309028d5a10 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:17:05 +0100 Subject: [PATCH 41/81] added build-deps.sh to build dependencies --- build-deps.sh | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100755 build-deps.sh diff --git a/build-deps.sh b/build-deps.sh new file mode 100755 index 0000000..1bfd377 --- /dev/null +++ b/build-deps.sh @@ -0,0 +1,81 @@ +#!/bin/sh + +# Builds dependencies (latest stable releases) from source +# Used for building wheels. Invoke via 'make wheels' rather +# than directly! + +set -e + +. /etc/os-release + +get_latest_version() { + #Finds the latest git tag or falls back to returning the git default branch (usually master or main) + #Assumes some kind of semantic versioning (possibly with a v prefix) + TAG=$(git tag -l | grep -E "^v?[0-9]+(\.[0-9])*" | sort -t. -k 1.2,1n -k 2,2n -k 3,3n -k 4,4n | tail -n 1) + if [ -z "$TAG" ]; then + echo "No releases found, falling back to default git branch!">&2 + #output the git default branch for the repository in the current working dir (usually master or main) + git symbolic-ref refs/remotes/origin/HEAD | sed 's@^refs/remotes/origin/@@' + else + echo "$TAG" + fi +} + +[ -z "$PREFIX" ] && PREFIX="/usr/local/" +if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then + if [ -d /usr/local/share/aclocal ]; then + #needed for manylinux_2_28 container which ships custom autoconf, possibly others too? + export ACLOCAL_PATH=/usr/share/aclocal + fi + if [ "$VERSION_ID" = "7" ]; then + yum install -y libexttextcat-devel + if [ -d /opt/rh/devtoolset-10/root/usr/lib ]; then + #we are running in the manylinux2014 image + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/rh/devtoolset-10/root/usr/lib + #libxml2 is out of date, compile and install a new one + yum install -y xz + wget https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.tar.xz + unxz libxml2-2.9.14.tar.xz + tar -xf libxml2-2.9.14.tar + cd libxml2-2.9.14 && ./configure --prefix=$PREFIX --without-python && make && make install + cd .. + fi + elif [ "$VERSION_ID" = "8" ]; then + #they forgot to package libexttextcat-devel? grab one manually: + wget https://github.com/proycon/LaMachine/raw/master/deps/centos8/libexttextcat-devel-3.4.5-2.el8.x86_64.rpm + yum install -y libexttextcat-devel-3.4.5-2.el8.x86_64.rpm + fi +fi + +PWD="$(pwd)" +BUILDDIR="$(mktemp -dt "build-deps.XXXXXX")" +cd "$BUILDDIR" +for PACKAGE in LanguageMachines/ticcutils LanguageMachines/timbl; do + echo "Git cloning $PACKAGE ">&2 + git clone https://github.com/$PACKAGE + PACKAGE="$(basename $PACKAGE)" + cd "$PACKAGE" + if [ "$1" != "--devel" ]; then + VERSION="$(get_latest_version)" + if [ "$VERSION" != "master" ] && [ "$VERSION" != "main" ] && [ "$VERSION" != "devel" ]; then + echo "Checking out latest stable version: $VERSION">&2 + git -c advice.detachedHead=false checkout "$VERSION" + fi + fi + echo "Bootstrapping $PACKAGE ">&2 + if [ ! -f configure ] && [ -f configure.ac ]; then + #shellcheck disable=SC2086 + autoreconf --install --verbose + fi + echo "Configuring $PACKAGE" >&2 + ./configure --prefix="$PREFIX" >&2 + echo "Make $PACKAGE" >&2 + make + echo "Make install $PACKAGE" >&2 + make install + cd .. +done +cd $PWD +[ -n "$BUILDDIR" ] && rm -Rf "$BUILDDIR" + +echo "Dependencies installed" >&2 From d069419076f0ab9c916495b50e70c32484bfdb3d Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:21:37 +0100 Subject: [PATCH 42/81] ci: added wheel building --- .github/workflows/wheels.yml | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 .github/workflows/wheels.yml diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000..8ec87c3 --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,70 @@ +name: Build Wheels + +on: [workflow_dispatch] + +jobs: + build_wheels: + name: Build wheels on for ${{matrix.python.cp}}-${{ matrix.buildplat.sys }} + runs-on: ${{ matrix.buildplat.runs_on }} + strategy: + matrix: + buildplat: + - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } + - { runs_on: macos-12, sys: macosx, arch: x86_64, benv: "12.0" } + - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } + python: + - { cp: "cp38", rel: "3.8" } + - { cp: "cp39", rel: "3.9" } + - { cp: "cp310", rel: "3.10" } + - { cp: "cp311", rel: "3.11" } + - { cp: "cp312", rel: "3.12" } + - { cp: "cp313", rel: "3.13" } + + steps: + - uses: actions/checkout@v4.1.1 + + # Used to host cibuildwheel + - uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install cibuildwheel + run: python -m pip install cibuildwheel + + - name: Build wheels (Linux) + if: ${{ runner.os != 'macOS' }} + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* + CIBW_ARCHS_LINUX: "x86_64" + CIBW_BEFORE_ALL_LINUX: > + if command -v apt-get; then + apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make + elif command -v yum; then + yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget + #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise + elif command -v apk; then + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && + rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ + fi && + ./build-deps.sh + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2014_x86_64 + CIBW_SKIP: "*-win* *-manylinux_i686 pp*" + + - name: Build wheels (macOS) + if: ${{ runner.os == 'macOS' && runner.python != "3.8" }} + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* + CIBW_ARCHS: ${{ matrix.buildplat.arch }} + CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" + CIBW_BEFORE_ALL_MACOS: > + brew install boost + brew tap fbkarsdorp/homebrew-lamachine && + brew install timbl + + - uses: actions/upload-artifact@v4 + if: ${{ ! (runner.os == 'macOS' && runner.python == "3.8") }} + with: + name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} + path: ./wheelhouse/*.whl From 9e91263b386eb3f76971c2f356dcd40317c4e2db Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:28:03 +0100 Subject: [PATCH 43/81] ci: syntax fix --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8ec87c3..b42adae 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -52,7 +52,7 @@ jobs: CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (macOS) - if: ${{ runner.os == 'macOS' && runner.python != "3.8" }} + if: ${{ runner.os == 'macOS' && runner.python != '3.8' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -64,7 +64,7 @@ jobs: brew install timbl - uses: actions/upload-artifact@v4 - if: ${{ ! (runner.os == 'macOS' && runner.python == "3.8") }} + if: ${{ ! (runner.os == 'macOS' && runner.python == '3.8') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From 23b583e5aa20955f16401ad0ce50ab8f7a350e52 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:30:31 +0100 Subject: [PATCH 44/81] ci: syntax fix --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b42adae..4dbc85c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -59,7 +59,7 @@ jobs: CIBW_ARCHS: ${{ matrix.buildplat.arch }} CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" CIBW_BEFORE_ALL_MACOS: > - brew install boost + brew install boost && brew tap fbkarsdorp/homebrew-lamachine && brew install timbl From 0063677fcf6d681b21b2b3b495d1c42c71c6da16 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:47:34 +0100 Subject: [PATCH 45/81] build-deps.sh: removed unnecessary dependencies --- build-deps.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/build-deps.sh b/build-deps.sh index 1bfd377..cd8578d 100755 --- a/build-deps.sh +++ b/build-deps.sh @@ -28,7 +28,6 @@ if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then export ACLOCAL_PATH=/usr/share/aclocal fi if [ "$VERSION_ID" = "7" ]; then - yum install -y libexttextcat-devel if [ -d /opt/rh/devtoolset-10/root/usr/lib ]; then #we are running in the manylinux2014 image export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/rh/devtoolset-10/root/usr/lib @@ -40,10 +39,6 @@ if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then cd libxml2-2.9.14 && ./configure --prefix=$PREFIX --without-python && make && make install cd .. fi - elif [ "$VERSION_ID" = "8" ]; then - #they forgot to package libexttextcat-devel? grab one manually: - wget https://github.com/proycon/LaMachine/raw/master/deps/centos8/libexttextcat-devel-3.4.5-2.el8.x86_64.rpm - yum install -y libexttextcat-devel-3.4.5-2.el8.x86_64.rpm fi fi From 8727aa0ad2df82fb9b6a42cda93e824f5e77c280 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Mon, 28 Oct 2024 17:49:58 +0100 Subject: [PATCH 46/81] ci: install boost dependency --- .github/workflows/wheels.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 4dbc85c..b99378f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -39,12 +39,12 @@ jobs: CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: > if command -v apt-get; then - apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make + apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make libboost-dev elif command -v yum; then - yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget + yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise elif command -v apk; then - apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync boost-dev && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ fi && ./build-deps.sh From 44694a3ee2bb0e3b94f0d010c20618da6c0a85ce Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 11:59:30 +0100 Subject: [PATCH 47/81] ci: attempted fix for mac --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b99378f..f02187e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -59,7 +59,7 @@ jobs: CIBW_ARCHS: ${{ matrix.buildplat.arch }} CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" CIBW_BEFORE_ALL_MACOS: > - brew install boost && + brew install boost icu4c && brew tap fbkarsdorp/homebrew-lamachine && brew install timbl From 96dc19a8c3df099aa2e59dd07a028b7ff95f8f5a Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 12:39:27 +0100 Subject: [PATCH 48/81] overhauled setup.py --- .github/workflows/wheels.yml | 2 +- setup.py | 144 +++++++++++++++-------------------- 2 files changed, 61 insertions(+), 85 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f02187e..109d663 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -59,7 +59,7 @@ jobs: CIBW_ARCHS: ${{ matrix.buildplat.arch }} CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" CIBW_BEFORE_ALL_MACOS: > - brew install boost icu4c && + brew install boost boost-python3 icu4c && brew tap fbkarsdorp/homebrew-lamachine && brew install timbl diff --git a/setup.py b/setup.py index 183507a..333cd93 100755 --- a/setup.py +++ b/setup.py @@ -28,67 +28,65 @@ def updateDocHeader(input, output): stream.close() +includedirs = [] +libdirs = [] +print(f"system={platform.system()} machine={platform.machine()}", file=sys.stderr) +if platform.system() == "Darwin": + #we are running on Mac OS X (with homebrew hopefully), stuff is in specific locations: + if platform.machine().lower() == "arm64": + print("(macos arm64 detected)", file=sys.stderr) + libdirs.append("/opt/homebrew/lib") + includedirs.append("/opt/homebrew/include") + libdirs.append("/opt/homebrew/icu4c/lib") + includedirs.append("/opt/homebrew/icu4c/include") + libdirs.append("/opt/homebrew/libxml2/lib") + includedirs.append("/opt/homebrew/libxml2/include") + includedirs.append("/opt/homebrew/libxml2/include/libxml2") + libdirs.append("/opt/homebrew/opt/icu4c/lib") + includedirs.append("/opt/homebrew/opt/icu4c/include") + libdirs.append("/opt/homebrew/opt/libxml2/lib") + includedirs.append("/opt/homebrew/opt/libxml2/include") + libdirs.append("/opt/homebrew/opt/boost-python3/lib") + includedirs.append("/opt/homebrew/opt/boost-python3/include") + libdirs.append("/opt/homebrew/opt/boost/lib") + includedirs.append("/opt/homebrew/opt/boost/include") + else: + #we are running on Mac OS X with homebrew, stuff is in specific locations: + libdirs.append("/usr/local/opt/icu4c/lib") + includedirs.append("/usr/local/opt/icu4c/include") + libdirs.append("/usr/local/opt/libxml2/lib") + includedirs.append("/usr/local/opt/libxml2/include") + includedirs.append("/usr/local/opt/libxml2/include/libxml2") + libdirs.append("/usr/local/opt/boost-python3/lib") + includedirs.append("/usr/local/opt/boost-python3/lib") + libdirs.append("/usr/local/opt/boost/lib") + includedirs.append("/usr/local/opt/boost/include") + +#add some common default paths +includedirs += ['/usr/include/', '/usr/include/libxml2','/usr/local/include/' ] +libdirs += ['/usr/lib','/usr/local/lib'] +if 'VIRTUAL_ENV' in os.environ: + includedirs.insert(0,os.environ['VIRTUAL_ENV'] + '/include') + libdirs.insert(0,os.environ['VIRTUAL_ENV'] + '/lib') +if 'INCLUDE_DIRS' in os.environ: + includedirs = list(os.environ['INCLUDE_DIRS'].split(':')) + includedirs +if 'LIBRARY_DIRS' in os.environ: + libdirs = list(os.environ['LIBRARY_DIRS'].split(':')) + libdirs + +if platform.system() == "Darwin": + extra_options = ["--stdlib=libc++",'-D U_USING_ICU_NAMESPACE=1'] +else: + extra_options = ['-D U_USING_ICU_NAMESPACE=1'] + +print(f"include_dirs={' '.join(includedirs)} library_dirs={' '.join(libdirs)} extra_options={' '.join(extra_options)}", file=sys.stderr) class BuildExt(build_ext): - - user_options = build_ext.user_options + [ - ("boost-include-dir=", None, "directory for boost header files"), - ("boost-library-dir=", None, "directory for boost library files"), - ("timbl-include-dir=", None, "directory for TiMBL files"), - ("timbl-library-dir=", None, "directory for TiMBL library files"), - ("libxml2-include-dir=", None, "directory for LibXML2 files"), - ("libxml2-library-dir=", None, "directory for LibXML2 library files"), - ("static-boost-python3", "s", "statically link boost-python")] - - boolean_options = build_ext.boolean_options + [ - "static-boost-python3"] - - def initialize_options(self): build_ext.initialize_options(self) pyversion = sys.version.split(" ")[0] pyversion = pyversion.split(".")[0] + pyversion.split(".")[1] #returns something like 312 for 3.12 - libsearch = ['/usr/lib', '/usr/lib64', '/usr/lib/' + platform.machine() + '-' + platform.system().lower() + '-gnu', '/usr/lib/x86_64-linux-gnu/', '/usr/local/lib', '/usr/local/lib64'] - includesearch = ['/usr/include', '/usr/local/include'] - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/lib'): - libsearch.insert(0, os.environ['VIRTUAL_ENV'] + '/lib') - if 'VIRTUAL_ENV' in os.environ and os.path.exists(os.environ['VIRTUAL_ENV'] + '/include'): - includesearch.insert(0, os.environ['VIRTUAL_ENV'] + '/include') - #Find boost - self.findboost(libsearch, includesearch, pyversion) - - #Find libxml2 - if os.path.exists('/usr/local/Cellar/libxml2'): - #Mac OS X with homebrew - versiondirs = [] - for d in glob.glob('/usr/local/Cellar/libxml2/*'): - if os.path.isdir(d) and d[0] != '.': - versiondirs.append(os.path.basename(d)) - if versiondirs: - versiondirs.sort() - version = versiondirs[0] - libsearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/lib') - includesearch.insert(0,'/usr/local/Cellar/libxml2/' + version + '/include') - - for d in includesearch: - if os.path.exists(d + '/libxml2'): - self.libxml2_include_dir = d + '/libxml2' - self.libxml2_library_dir = d.replace('include','lib') - break - - #Find timbl - self.timbl_library_dir = None - for d in includesearch: - if os.path.exists(d + '/timbl'): - self.timbl_include_dir = d - self.timbl_library_dir = d.replace('include','lib') - break - - if self.timbl_library_dir is None: - raise Exception("Timbl not found, make sure to install Timbl and set --timbl-include-dir and --timbl-library-dir appropriately...") - - self.static_boost_python = False + self.findboost(libdirs, includedirs, pyversion) def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None @@ -99,6 +97,11 @@ def findboost(self, libsearch, includesearch, pyversion): libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') + if os.path.exists('/opt/homebrew/boost-python3'): + libsearch.insert(0,'/opt/homebrew/boost-python3/lib') + libsearch.insert(0,'/opt/homebrew/boost/lib') + includesearch.insert(0,'/opt/homebrew/boost-python3/include') + includesearch.insert(0,'/opt/homebrew/boost/include') for d in libsearch: if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): @@ -147,45 +150,19 @@ def findboost(self, libsearch, includesearch, pyversion): print("Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-include-dir",file=sys.stderr) self.boost_include_dir = includesearch[0] - def finalize_options(self): - build_ext.finalize_options(self) - self.ensure_file_exists("boost_include_dir", "boost/python.hpp") - self.ensure_dirname("boost_library_dir") - self.ensure_file_exists("timbl_include_dir", "timbl/TimblAPI.h") - self.ensure_dirname("timbl_library_dir") - self.ensure_file_exists("libxml2_include_dir", "libxml/tree.h") - self.ensure_dirname("libxml2_library_dir") - - def ensure_file_exists(self, option, filename): - self.ensure_dirname(option) - self._ensure_tested_string( - option, - lambda d: os.path.isfile(os.path.join(d, filename)), - "directory name", - "'%s' was not found in '%%s'" % filename) - def build_extensions(self): if newer("src/docstrings.h.in", "src/docstrings.h"): updateDocHeader("src/docstrings.h.in", "src/docstrings.h") for ext in self.extensions: - ext.include_dirs.append(self.boost_include_dir) - ext.include_dirs.append(self.timbl_include_dir) - ext.include_dirs.append(self.libxml2_include_dir) - ext.library_dirs.append(self.timbl_library_dir) - ext.library_dirs.append(self.boost_library_dir) - ext.library_dirs.append(self.libxml2_library_dir) + ext.include_dirs += includedirs + ext.library_dirs += libdirs compile_args = ["-std=c++17"] if platform.system() == "Darwin": compile_args.append("-stdlib=libc++") ext.extra_compile_args.extend(compile_args) - if isinstance(self.compiler, UnixCCompiler) and self.static_boost_python: - ext.extra_link_args.extend( - "-Wl,-Bstatic -l" + self.boostlib + " -Wl,-Bdynamic".split()) - else: - ext.libraries.append(self.boostlib) - + ext.libraries.append(self.boostlib) build_ext.build_extensions(self) @@ -194,7 +171,6 @@ def build_extensions(self): libraries=["timbl"], depends=["src/timblapi.h", "src/docstrings.h"]) - setup( name="python3-timbl", version="2024.10.28", From 9f6fa0f3897c8a9a30402f43746a0fd5c7eebdc8 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 12:53:09 +0100 Subject: [PATCH 49/81] find boost --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 333cd93..6e1dbeb 100755 --- a/setup.py +++ b/setup.py @@ -94,10 +94,12 @@ def findboost(self, libsearch, includesearch, pyversion): self.boostlib = "boost_python" if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew + self.boostlib = "boost_python3" libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') if os.path.exists('/opt/homebrew/boost-python3'): + self.boostlib = "boost_python3" libsearch.insert(0,'/opt/homebrew/boost-python3/lib') libsearch.insert(0,'/opt/homebrew/boost/lib') includesearch.insert(0,'/opt/homebrew/boost-python3/include') From f64c5a3d74de5a122cd9385d1d112d72e864390b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:01:01 +0100 Subject: [PATCH 50/81] ci: restrict mac wheels to py 3.12 only --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 109d663..1c71028 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -52,7 +52,7 @@ jobs: CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (macOS) - if: ${{ runner.os == 'macOS' && runner.python != '3.8' }} + if: ${{ runner.os == 'macOS' && runner.python == '3.12' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -64,7 +64,7 @@ jobs: brew install timbl - uses: actions/upload-artifact@v4 - if: ${{ ! (runner.os == 'macOS' && runner.python == '3.8') }} + if: ${{ ! (runner.os == 'macOS' && runner.python != '3.12') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From 221c706d3ee8dbdfb5b8606e1642660d3bae949e Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:22:07 +0100 Subject: [PATCH 51/81] ci: skip python 3.13 for now --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1c71028..dd4553e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -18,7 +18,6 @@ jobs: - { cp: "cp310", rel: "3.10" } - { cp: "cp311", rel: "3.11" } - { cp: "cp312", rel: "3.12" } - - { cp: "cp313", rel: "3.13" } steps: - uses: actions/checkout@v4.1.1 From 1c54eb3d1fd4b05aec98b3ad51dba6f12e98f550 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:34:45 +0100 Subject: [PATCH 52/81] version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6e1dbeb..422ce8e 100755 --- a/setup.py +++ b/setup.py @@ -175,7 +175,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2024.10.28", + version="2024.10.29", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 130480f0547c6ad499c5af0a4fbb83d6993dc028 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:46:44 +0100 Subject: [PATCH 53/81] ci: drop older python and macos 12 --- .github/workflows/wheels.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index dd4553e..e548874 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -10,11 +10,8 @@ jobs: matrix: buildplat: - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } - - { runs_on: macos-12, sys: macosx, arch: x86_64, benv: "12.0" } - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } python: - - { cp: "cp38", rel: "3.8" } - - { cp: "cp39", rel: "3.9" } - { cp: "cp310", rel: "3.10" } - { cp: "cp311", rel: "3.11" } - { cp: "cp312", rel: "3.12" } @@ -51,7 +48,7 @@ jobs: CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (macOS) - if: ${{ runner.os == 'macOS' && runner.python == '3.12' }} + if: ${{ runner.os == 'macOS' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -63,7 +60,6 @@ jobs: brew install timbl - uses: actions/upload-artifact@v4 - if: ${{ ! (runner.os == 'macOS' && runner.python != '3.12') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From 235fae00b89f3aab4480e32d73b5ce672759eeb2 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:50:49 +0100 Subject: [PATCH 54/81] ci: use python 3.12 for cibuildwheel --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e548874..f45ddc8 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -22,7 +22,7 @@ jobs: # Used to host cibuildwheel - uses: actions/setup-python@v5 with: - python-version: 3.11 + python-version: 3.12 - name: Install cibuildwheel run: python -m pip install cibuildwheel From 16804d896bb010f6bc5822727c48ba43a8255f8f Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 13:56:21 +0100 Subject: [PATCH 55/81] ci: test for mac --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f45ddc8..a7bd7b1 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -55,7 +55,7 @@ jobs: CIBW_ARCHS: ${{ matrix.buildplat.arch }} CIBW_ENVIRONMENT: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.buildplat.benv }}" CIBW_BEFORE_ALL_MACOS: > - brew install boost boost-python3 icu4c && + brew install boost boost-python3 && brew tap fbkarsdorp/homebrew-lamachine && brew install timbl From 09f6981c5925d98a05877696d25c1bb2b1c6927e Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 15:02:40 +0100 Subject: [PATCH 56/81] fix for finding boost-python3 on arm mac #5 --- setup.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 422ce8e..bac7905 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,6 @@ def updateDocHeader(input, output): libdirs.append("/opt/homebrew/opt/libxml2/lib") includedirs.append("/opt/homebrew/opt/libxml2/include") libdirs.append("/opt/homebrew/opt/boost-python3/lib") - includedirs.append("/opt/homebrew/opt/boost-python3/include") libdirs.append("/opt/homebrew/opt/boost/lib") includedirs.append("/opt/homebrew/opt/boost/include") else: @@ -98,12 +97,11 @@ def findboost(self, libsearch, includesearch, pyversion): libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') - if os.path.exists('/opt/homebrew/boost-python3'): + if os.path.exists('/opt/homebrew/opt/boost-python3'): self.boostlib = "boost_python3" - libsearch.insert(0,'/opt/homebrew/boost-python3/lib') - libsearch.insert(0,'/opt/homebrew/boost/lib') - includesearch.insert(0,'/opt/homebrew/boost-python3/include') - includesearch.insert(0,'/opt/homebrew/boost/include') + libsearch.insert(0,'/opt/homebrew/opt/boost-python3/lib') + libsearch.insert(0,'/opt/homebrew/opt/boost/lib') + includesearch.insert(0,'/opt/homebrew/opt/boost/include') for d in libsearch: if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): @@ -144,12 +142,12 @@ def findboost(self, libsearch, includesearch, pyversion): if self.boost_library_dir is not None: print("Detected boost library in " + self.boost_library_dir + " (" + self.boostlib +")",file=sys.stderr) else: - print("Unable to find boost library directory automatically. Is libboost-python3 installed? Set --boost-library-dir?",file=sys.stderr) + print("Unable to find boost library directory automatically. Is libboost-python3 installed?",file=sys.stderr) self.boost_library_dir = libsearch[0] if self.boost_include_dir is not None: print("Detected boost headers in " + self.boost_include_dir ,file=sys.stderr) else: - print("Unable to find boost headers automatically. Is libboost-python-dev installed? Set --boost-include-dir",file=sys.stderr) + print("Unable to find boost headers automatically. Is libboost-python-dev installed?",file=sys.stderr) self.boost_include_dir = includesearch[0] def build_extensions(self): From 4426ac4b9b3e4653d8bb242ea9de9cca2c60bc5a Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 29 Oct 2024 15:22:27 +0100 Subject: [PATCH 57/81] further attempt to fix building on mac arm #5 --- setup.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index bac7905..a53e5b5 100755 --- a/setup.py +++ b/setup.py @@ -82,23 +82,22 @@ def updateDocHeader(input, output): class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) - pyversion = sys.version.split(" ")[0] - pyversion = pyversion.split(".")[0] + pyversion.split(".")[1] #returns something like 312 for 3.12 + #Find boost - self.findboost(libdirs, includedirs, pyversion) + self.findboost(libdirs, includedirs) - def findboost(self, libsearch, includesearch, pyversion): + def findboost(self, libsearch, includesearch): + pyversion = sys.version.split(" ")[0] + pyversion = pyversion.split(".")[0] + pyversion.split(".")[1] #returns something like 312 for 3.12 self.boost_library_dir = None self.boost_include_dir = None - self.boostlib = "boost_python" + self.boostlib = "boost_python" + pyversion if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew - self.boostlib = "boost_python3" libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') if os.path.exists('/opt/homebrew/opt/boost-python3'): - self.boostlib = "boost_python3" libsearch.insert(0,'/opt/homebrew/opt/boost-python3/lib') libsearch.insert(0,'/opt/homebrew/opt/boost/lib') includesearch.insert(0,'/opt/homebrew/opt/boost/include') From 9417aff386b21b8be81cd7a05553cf98547acc35 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 30 Oct 2024 14:16:18 +0100 Subject: [PATCH 58/81] README: update --- README.rst | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/README.rst b/README.rst index b4d9a99..4a0f448 100644 --- a/README.rst +++ b/README.rst @@ -78,20 +78,7 @@ obtained through github:: and can then be built and installed with the following command:: - $ sudo python3 setup.py \ - build_ext --boost-include-dir=$BOOST_HEADERS \ - --boost-library-dir=$BOOST_LIBS \ - --timbl-include-dir=$TIMBL_HEADERS \ - --timbl-library-dir=$TIMBL_LIBS \ - install --prefix=/dir/to/install/in - -This is the verbose variant, if default locations are used then the following may suffice already:: - - $ sudo python setup3.py install - - -The ``--prefix`` option to the install command denotes the directory in which the module is to be installed. If you have the appropriate system permissions, you can leave out this option. The module will then be installed in the Python system tree. Otherwise, make sure that the installation directory is in the module search path of your Python -system. + $ pip install . Usage ======= From 33a8dd9a166fc9f134ec26fcd2a42e598456088d Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 3 Dec 2024 18:22:39 +0100 Subject: [PATCH 59/81] temporarily disable mac wheels (because they don't work anyway) --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index a7bd7b1..9c9408c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -10,7 +10,6 @@ jobs: matrix: buildplat: - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } - - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } python: - { cp: "cp310", rel: "3.10" } - { cp: "cp311", rel: "3.11" } From 636be6c6d656645f831d8a6e2a04c3ccebb1af4d Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Thu, 5 Dec 2024 10:25:23 +0100 Subject: [PATCH 60/81] Revert "further attempt to fix building on mac arm #5" This reverts commit 4426ac4b9b3e4653d8bb242ea9de9cca2c60bc5a. --- setup.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index a53e5b5..bac7905 100755 --- a/setup.py +++ b/setup.py @@ -82,22 +82,23 @@ def updateDocHeader(input, output): class BuildExt(build_ext): def initialize_options(self): build_ext.initialize_options(self) - - #Find boost - self.findboost(libdirs, includedirs) - - def findboost(self, libsearch, includesearch): pyversion = sys.version.split(" ")[0] pyversion = pyversion.split(".")[0] + pyversion.split(".")[1] #returns something like 312 for 3.12 + #Find boost + self.findboost(libdirs, includedirs, pyversion) + + def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = None self.boost_include_dir = None - self.boostlib = "boost_python" + pyversion + self.boostlib = "boost_python" if os.path.exists('/usr/local/opt/boost-python3'): #Mac OS X with homebrew + self.boostlib = "boost_python3" libsearch.insert(0,'/usr/local/opt/boost-python3/lib') libsearch.insert(0,'/usr/local/opt/boost/lib') includesearch.insert(0,'/usr/local/opt/boost/include') if os.path.exists('/opt/homebrew/opt/boost-python3'): + self.boostlib = "boost_python3" libsearch.insert(0,'/opt/homebrew/opt/boost-python3/lib') libsearch.insert(0,'/opt/homebrew/opt/boost/lib') includesearch.insert(0,'/opt/homebrew/opt/boost/include') From cb172c1d0718ade26149500a1fcd8aea88d0aebb Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 17 Dec 2024 13:55:44 +0100 Subject: [PATCH 61/81] ci: wheel building --- .github/workflows/wheels.yml | 21 +++++++++++++++++++-- build-deps.sh | 8 ++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9c9408c..6465d1e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -10,10 +10,13 @@ jobs: matrix: buildplat: - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } + - { runs_on: ubuntu-20.04, sys: musllinux, arch: x86_64, benv: "" } + - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } python: - { cp: "cp310", rel: "3.10" } - { cp: "cp311", rel: "3.11" } - { cp: "cp312", rel: "3.12" } + - { cp: "cp313", rel: "3.13" } steps: - uses: actions/checkout@v4.1.1 @@ -26,8 +29,8 @@ jobs: - name: Install cibuildwheel run: python -m pip install cibuildwheel - - name: Build wheels (Linux) - if: ${{ runner.os != 'macOS' }} + - name: Build wheels (Linux glibc) + if: ${{ matrix.buildplat.sys == 'manylinux' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -46,6 +49,20 @@ jobs: CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2014_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" + - name: Build wheels (Linux musl) + if: ${{ matrix.buildplat.sys == 'musllinux' }} + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* + CIBW_ARCHS_LINUX: "x86_64" + CIBW_BEFORE_ALL_LINUX: > + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && + mkdir -p /usr/local/share/aclocal/ && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ && + ./build-deps.sh + CIBW_MUSLLINUX_X86_64_IMAGE: quay.io/pypa/musllinux_1_1_x86_64 + CIBW_MUSLLINUX_AARCH64_IMAGE: quay.io/pypa/musllinux_1_1_aarch64 + CIBW_SKIP: "*-win* *-manylinux_i686 pp*" + - name: Build wheels (macOS) if: ${{ runner.os == 'macOS' }} run: python -m cibuildwheel --output-dir wheelhouse diff --git a/build-deps.sh b/build-deps.sh index cd8578d..2f2381a 100755 --- a/build-deps.sh +++ b/build-deps.sh @@ -7,6 +7,8 @@ set -e . /etc/os-release +echo "OS: $ID">&2 +echo "VERSION: $VERSION_ID">&2 get_latest_version() { #Finds the latest git tag or falls back to returning the git default branch (usually master or main) @@ -27,7 +29,8 @@ if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then #needed for manylinux_2_28 container which ships custom autoconf, possibly others too? export ACLOCAL_PATH=/usr/share/aclocal fi - if [ "$VERSION_ID" = "7" ]; then + case $VERSION_ID in + 7*) if [ -d /opt/rh/devtoolset-10/root/usr/lib ]; then #we are running in the manylinux2014 image export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/rh/devtoolset-10/root/usr/lib @@ -39,7 +42,8 @@ if [ "$ID" = "almalinux" ] || [ "$ID" = "centos" ] || [ "$ID" = "rhel" ]; then cd libxml2-2.9.14 && ./configure --prefix=$PREFIX --without-python && make && make install cd .. fi - fi + ;; + esac fi PWD="$(pwd)" From 243646f028b0152a65d0c5344503dad261eed802 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:06:04 +0100 Subject: [PATCH 62/81] expanded boost detection --- setup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup.py b/setup.py index bac7905..3f65bf3 100755 --- a/setup.py +++ b/setup.py @@ -102,6 +102,11 @@ def findboost(self, libsearch, includesearch, pyversion): libsearch.insert(0,'/opt/homebrew/opt/boost-python3/lib') libsearch.insert(0,'/opt/homebrew/opt/boost/lib') includesearch.insert(0,'/opt/homebrew/opt/boost/include') + if os.path.exists('/opt/homebrew/opt/boost-python' + pyversion): + self.boostlib = "boost_python" + pyversion + libsearch.insert(0,f"/opt/homebrew/opt/boost-python{pyversion}/lib") + libsearch.insert(0,'/opt/homebrew/opt/boost/lib') + includesearch.insert(0,'/opt/homebrew/opt/boost/include') for d in libsearch: if os.path.exists(d + "/libboost_python-py"+pyversion+".so"): @@ -121,6 +126,10 @@ def findboost(self, libsearch, includesearch, pyversion): self.boost_library_dir = d self.boostlib = "boost_python" break + elif os.path.exists(d + "/libboost_python-py" + pyversion + ".dylib"): #Mac OS X + self.boost_library_dir = d + self.boostlib = "boost_python-py" + pyversion + break elif os.path.exists(d + "/libboost_python" + pyversion + ".dylib"): #Mac OS X self.boost_library_dir = d self.boostlib = "boost_python" + pyversion From 718d43cef7250e3b3090abf4147214237b0fff3b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:12:38 +0100 Subject: [PATCH 63/81] debug --- .github/workflows/wheels.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6465d1e..270928b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -73,7 +73,8 @@ jobs: CIBW_BEFORE_ALL_MACOS: > brew install boost boost-python3 && brew tap fbkarsdorp/homebrew-lamachine && - brew install timbl + brew install timbl && + du -ah /opt/homebrew - uses: actions/upload-artifact@v4 with: From e3e1fc3749ba42853209abf5d02fa55b9b241f1b Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:25:53 +0100 Subject: [PATCH 64/81] ci: restrict mac wheels to python 3.12 only --- .github/workflows/wheels.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 270928b..8d8d8f4 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -64,7 +64,7 @@ jobs: CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (macOS) - if: ${{ runner.os == 'macOS' }} + if: ${{ runner.os == 'macOS' && matrix.python.cp == 'cp312' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -74,9 +74,10 @@ jobs: brew install boost boost-python3 && brew tap fbkarsdorp/homebrew-lamachine && brew install timbl && - du -ah /opt/homebrew + du -ah /opt/homebrew | grep boost_python - uses: actions/upload-artifact@v4 + if: ${{ ! (runner.os == 'macOS' && matrix.python.cp == 'cp312') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From b1bdbfc7120ea09d9d07c22444031d48b99b44c0 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:31:14 +0100 Subject: [PATCH 65/81] version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3f65bf3..b234d8a 100755 --- a/setup.py +++ b/setup.py @@ -182,7 +182,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2024.10.29", + version="2024.12.18", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 17f186ecfdb96eebc5b13c7f87b8cc92f9c0f963 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:36:27 +0100 Subject: [PATCH 66/81] README update --- README.rst | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/README.rst b/README.rst index 4a0f448..b4c7039 100644 --- a/README.rst +++ b/README.rst @@ -38,47 +38,30 @@ TiMBL. Installation ============ -python-timbl is distributed as part of **LaMachine** -(https://proycon.github.io/LaMachine), which significantly simplifies -compilation and installation. The remainder of the instructions in this section -refer to manual compilation and installation. +In a Python virtual environment, run: + +``` +pip install python3-timbl +``` + +If no wheels (binary packages) are available for your system, then this will +attempt to compile from source. If that is the case, a number of dependencies +are required: python-timbl depends on two external packages, which must have been built and/or installed on your system in order to successfully build python-timbl. The first is TiMBL itself; download its tarball from TiMBL's homepage and -follow the installation instructions, recent Ubuntu/Debian users will find -timbl in their distribution's package repository. In the remainder of this -section, it is assumed that ``$TIMBL_HEADERS`` points to the directory that -contains ``timbl/TimblAPI.h``, and ``$TIMBL_LIBS`` the directory that has -contains the Timbl libraries. Note that Timbl itself depends on additional -dependencies. - -The second prerequisite is Boost.Python, a library that facilitates writing +follow the installation instructions. The second prerequisite is Boost.Python, a library that facilitates writing Python extension modules in C++. Many Linux distributions come with prebuilt packages of Boost.Python. If so, install this package; on Ubuntu/Debian this can be done as follows:: $ sudo apt-get install libboost-python libboost-python-dev -If not, refer to the `Boost installation instructions`_ to build and install -Boost.Python manually. In the remainder of this section, let ``$BOOST_HEADERS`` -refer to the directory that contains the Boost header files, and -``$BOOST_LIBS`` to the directory that contains the Boost library files. If you -installed Boost.Python with your distribution's package manager, these -directories are probably ``/usr/include`` and ``/usr/lib`` respectively. - -.. _Boost installation instructions: http://www.boost.org/more/getting_started.html - - -If both prerequisites have been installed on your system, python-timbl can be -obtained through github:: - - $ git clone git://github.com/proycon/python-timbl.git - $ cd python-timbl - -and can then be built and installed with the following command:: +Note that on macOS, wheel packages are currently only available for the Python +3.12, as this the the Python version Homebrew uses in linking libboost-python. +Make sure you use that version. - $ pip install . Usage ======= From 1d6c88ae81a4c1f7f5a56152fd24ac8fb82565a4 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:40:04 +0100 Subject: [PATCH 67/81] ci: musllinux fix --- .github/workflows/wheels.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8d8d8f4..a756e07 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -41,9 +41,6 @@ jobs: elif command -v yum; then yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise - elif command -v apk; then - apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync boost-dev && - rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ fi && ./build-deps.sh CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2014_x86_64 @@ -56,7 +53,7 @@ jobs: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: > - apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev libexttextcat-dev libtool rsync && + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev boost-dev boost1.84-python3 libtool rsync && mkdir -p /usr/local/share/aclocal/ && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ && ./build-deps.sh CIBW_MUSLLINUX_X86_64_IMAGE: quay.io/pypa/musllinux_1_1_x86_64 From 34da0f027e620828f99e8aeeedc7464d817045a0 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:43:00 +0100 Subject: [PATCH 68/81] fixup --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index a756e07..eb73075 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -53,7 +53,7 @@ jobs: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: > - apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev boost-dev boost1.84-python3 libtool rsync && + apk add build-base git autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev boost-dev boost-python3 libtool rsync && mkdir -p /usr/local/share/aclocal/ && rsync -av --ignore-existing /usr/share/aclocal/*.m4 /usr/local/share/aclocal/ && ./build-deps.sh CIBW_MUSLLINUX_X86_64_IMAGE: quay.io/pypa/musllinux_1_1_x86_64 From 4606fc6938ba08f574a42adf5d5fa46e4ff5692f Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:50:09 +0100 Subject: [PATCH 69/81] ci: no musllinux for now --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index eb73075..50e25ae 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -10,7 +10,6 @@ jobs: matrix: buildplat: - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } - - { runs_on: ubuntu-20.04, sys: musllinux, arch: x86_64, benv: "" } - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } python: - { cp: "cp310", rel: "3.10" } From b193f1d8130bf1b4ac4d816fef25d00b1935b9f6 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 12:58:30 +0100 Subject: [PATCH 70/81] ci: update base container for manylinux --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 50e25ae..5a1aa2b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -42,7 +42,7 @@ jobs: #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise fi && ./build-deps.sh - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2014_x86_64 + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2_28_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (Linux musl) From b6da48fec68de790afb5b94eab350f49cd2d7fb1 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 18 Dec 2024 13:00:22 +0100 Subject: [PATCH 71/81] fixup --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 5a1aa2b..310b3c5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -42,7 +42,7 @@ jobs: #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise fi && ./build-deps.sh - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux2_28_x86_64 + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (Linux musl) From c60f36ac0322f08a3fca48ebce84a3f3521d4e29 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 21 Jan 2025 23:44:57 +0100 Subject: [PATCH 72/81] attempted wheel fix, some wheels were linked against python 2.7 for some reason! --- .github/workflows/wheels.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 310b3c5..51c1630 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -38,8 +38,7 @@ jobs: if command -v apt-get; then apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make libboost-dev elif command -v yum; then - yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel - #on CentOS 7 we also have libtar-devel which will be installed by build-deps.sh, on 8 they are missing and will be installed from source or otherwise + yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel boost-python3 boost-python3-devel fi && ./build-deps.sh CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 From b01646b6e5071eaedc23c55e2e441fe053abe40d Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 21 Jan 2025 23:53:37 +0100 Subject: [PATCH 73/81] ci: mac build now only compiles against 3.13 because homebrew's libboost links against that --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 51c1630..5a9e383 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -59,7 +59,7 @@ jobs: CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (macOS) - if: ${{ runner.os == 'macOS' && matrix.python.cp == 'cp312' }} + if: ${{ runner.os == 'macOS' && matrix.python.cp == 'cp313' }} run: python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.python.cp }}-${{ matrix.buildplat.sys }}* @@ -72,7 +72,7 @@ jobs: du -ah /opt/homebrew | grep boost_python - uses: actions/upload-artifact@v4 - if: ${{ ! (runner.os == 'macOS' && matrix.python.cp == 'cp312') }} + if: ${{ ! (runner.os == 'macOS' && matrix.python.cp == 'cp313') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From f5374d7402a69fe50e0034b73d0153f0d0ebc92c Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 22 Jan 2025 00:09:11 +0100 Subject: [PATCH 74/81] ci wheels: try building against glibc >= 2.34 (almalinux 9), otherwise boost-python3 is too old --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 5a9e383..ab2acbb 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -38,10 +38,10 @@ jobs: if command -v apt-get; then apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make libboost-dev elif command -v yum; then - yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel boost-python3 boost-python3-devel + yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel fi && ./build-deps.sh - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_34_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (Linux musl) From 71bbd5fbc6d51a1606b78da434b83762714c942c Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 22 Jan 2025 12:33:39 +0100 Subject: [PATCH 75/81] ci wheels: build boost-python from source --- .github/workflows/wheels.yml | 5 +++-- README.rst | 9 ++++----- build-boost-python.sh | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) create mode 100755 build-boost-python.sh diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index ab2acbb..ca56e43 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -38,10 +38,11 @@ jobs: if command -v apt-get; then apt-get -y git libicu-dev libxml2-dev libxslt1-dev libbz2-dev zlib1g-dev autoconf automake autoconf-archive libtool autotools-dev gcc g++ make libboost-dev elif command -v yum; then - yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget boost-devel + yum install -y git libicu-devel libxml2-devel libxslt-devel zlib-devel bzip2-devel libtool autoconf-archive autoconf automake m4 wget cmake fi && ./build-deps.sh - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_34_x86_64 + CIBW_BEFORE_BUILD: ./build-boost-python.sh + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 CIBW_SKIP: "*-win* *-manylinux_i686 pp*" - name: Build wheels (Linux musl) diff --git a/README.rst b/README.rst index b4c7039..d1353b2 100644 --- a/README.rst +++ b/README.rst @@ -44,6 +44,9 @@ In a Python virtual environment, run: pip install python3-timbl ``` +Note that on macOS, wheel packages are currently only available for Python +3.13, as this the the Python version Homebrew uses in linking libboost-python. + If no wheels (binary packages) are available for your system, then this will attempt to compile from source. If that is the case, a number of dependencies are required: @@ -54,14 +57,10 @@ The first is TiMBL itself; download its tarball from TiMBL's homepage and follow the installation instructions. The second prerequisite is Boost.Python, a library that facilitates writing Python extension modules in C++. Many Linux distributions come with prebuilt packages of Boost.Python. If so, install this package; on Ubuntu/Debian this -can be done as follows:: +can be done as follows. $ sudo apt-get install libboost-python libboost-python-dev -Note that on macOS, wheel packages are currently only available for the Python -3.12, as this the the Python version Homebrew uses in linking libboost-python. -Make sure you use that version. - Usage ======= diff --git a/build-boost-python.sh b/build-boost-python.sh new file mode 100755 index 0000000..6bc6d4d --- /dev/null +++ b/build-boost-python.sh @@ -0,0 +1,18 @@ +#!/bin/sh + + +# build boost-python from source on AlmaLinux 8 in manylinux_2_28 container (do not use in other contexts) + +set -e + +#var gets set bu cibuildwheel, assign to PYTHON_HOME for boost +export PYTHON_HOME=$Python_ROOT_DIR + +cd /tmp/ +wget -q https://github.com/boostorg/boost/releases/download/boost-1.87.0/boost-1.87.0-cmake.tar.gz +tar -xzf boost-1.87.0-cmake.tar.gz +cd boost-1.87.0 +./bootstrap.sh +./b2 --clean +./b2 install --with-python --prefix=/usr +cd $PREVPWD From cb6b78e4d6797657cadff75f002279d1701c19fc Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 22 Jan 2025 14:51:44 +0100 Subject: [PATCH 76/81] version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b234d8a..3d8881a 100755 --- a/setup.py +++ b/setup.py @@ -182,7 +182,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2024.12.18", + version="2025.01.22", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", From 5707c71c60c50d06cbb9ee1fb1da289a4c9be0ba Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 22 Jan 2025 14:52:54 +0100 Subject: [PATCH 77/81] ci: mac wheel is now python 3.13 only, adapt --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index ca56e43..3bfdcff 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -73,7 +73,7 @@ jobs: du -ah /opt/homebrew | grep boost_python - uses: actions/upload-artifact@v4 - if: ${{ ! (runner.os == 'macOS' && matrix.python.cp == 'cp313') }} + if: ${{ ! (runner.os == 'macOS' && matrix.python.cp != 'cp313') }} with: name: ${{matrix.python.cp}}-${{matrix.buildplat.sys}}-${{matrix.buildplat.arch}} path: ./wheelhouse/*.whl From 65f2b399c158a81ffa75b15d409c337660ba4576 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Fri, 2 May 2025 10:53:06 +0200 Subject: [PATCH 78/81] make bestNeighbours() method available in higher-level API --- example.py | 4 +++- setup.py | 2 +- timbl.py | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/example.py b/example.py index cb3edaf..2bc6a0c 100755 --- a/example.py +++ b/example.py @@ -53,7 +53,8 @@ os.unlink("testfile") -classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) + +classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1 +v n+di+k" ) #add some extra verbosity flags classifier.load() classifier.addinstance("testfile", (1,0,0),'financial' ) #addinstance can be used to add instances to external files (use append() for training) classifier.addinstance("testfile", (0,1,0),'furniture' ) @@ -63,6 +64,7 @@ classifier.test("testfile") print("Accuracy: ", classifier.getAccuracy()) +print("Best neighbours: ", classifier.bestNeighbours()) #this only works with the extra verbosity flags and only if python-timbl is compiled with gcc diff --git a/setup.py b/setup.py index 3d8881a..687de05 100755 --- a/setup.py +++ b/setup.py @@ -182,7 +182,7 @@ def build_extensions(self): setup( name="python3-timbl", - version="2025.01.22", + version="2025.05.02", description="Python 3 language binding for the Tilburg Memory-Based Learner", author="Sander Canisius, Maarten van Gompel", author_email="S.V.M.Canisius@uvt.nl, proycon@anaproy.nl", diff --git a/timbl.py b/timbl.py index 604b377..786f8d2 100644 --- a/timbl.py +++ b/timbl.py @@ -307,6 +307,9 @@ def readtestoutput(self): yield " ".join(segments[:endfvec - 2]).split(self.delimiter), segments[endfvec - 2], segments[endfvec - 1], distribution, distance f.close() + def bestNeighbours(self): + return self.api.bestNeighbours() + def _parsedistribution(self, instance, start=0, end =None): dist = {} i = start + 1 From 7550c2c25cf64ac796de1ae44e46e9849da20e69 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Fri, 2 May 2025 10:55:07 +0200 Subject: [PATCH 79/81] README: removed badge --- README.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.rst b/README.rst index d1353b2..cd43574 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,3 @@ -.. image:: http://applejack.science.ru.nl/lamabadge.php/python-timbl - :target: http://applejack.science.ru.nl/languagemachines/ - .. image:: https://www.repostatus.org/badges/latest/active.svg :alt: Project Status: Active – The project has reached a stable, usable state and is being actively developed. :target: https://www.repostatus.org/#active From c0c96e626b46636219cc18e78cd39fd68772f4de Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Fri, 2 May 2025 10:57:33 +0200 Subject: [PATCH 80/81] ci: upgrade ubuntu --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 3bfdcff..9287a1d 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -9,7 +9,7 @@ jobs: strategy: matrix: buildplat: - - { runs_on: ubuntu-20.04, sys: manylinux, arch: x86_64, benv: "" } + - { runs_on: ubuntu-22.04, sys: manylinux, arch: x86_64, benv: "" } - { runs_on: macos-14, sys: macosx, arch: arm64, benv: "14.0" } python: - { cp: "cp310", rel: "3.10" } From 0ab2b55fc379304d3699b43d8429dad64613b4f4 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Fri, 2 May 2025 11:24:16 +0200 Subject: [PATCH 81/81] also propagate settings() and options() to higher-level API --- timbl.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/timbl.py b/timbl.py index 786f8d2..5623d94 100644 --- a/timbl.py +++ b/timbl.py @@ -310,6 +310,15 @@ def readtestoutput(self): def bestNeighbours(self): return self.api.bestNeighbours() + def bestNeighbors(self): + return self.api.bestNeighbours() + + def settings(self): + return self.api.settings() + + def options(self): + return self.api.options() + def _parsedistribution(self, instance, start=0, end =None): dist = {} i = start + 1