bibtool.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. import re
  2. import sys
  3. import requests
  4. import json
  5. import hashlib
  6. import bibtexparser
  7. from bibtexparser.bparser import BibTexParser
  8. import difflib
  9. import os
  10. import argparse
  11. version = 15000000
  12. limit_traffic = True
  13. parser = argparse.ArgumentParser(description='BibTool')
  14. parser.add_argument("--token", dest="token", action="store", default="", help="Provide access token via command line")
  15. parser.add_argument(
  16. "--tokenfile", dest="token_file", action="store", default="token", help="File containing the access token")
  17. parser.add_argument("--server", dest="server", action="store", default="", required=True, help="BibTool server")
  18. parser.add_argument("--tex", dest="tex", action="store", default="main.tex", help="LaTeX file")
  19. parser.add_argument(
  20. "--query", dest="query", action="store", default="", help="Query to search for (if action is search)")
  21. parser.add_argument(
  22. "--exclude_sub_dirs",
  23. dest="exclude_sub_dirs",
  24. default=False,
  25. action="store_true",
  26. help="Exclude folders from recursive search")
  27. parser.add_argument("action")
  28. args = parser.parse_args(sys.argv[1:])
  29. if args.token != "":
  30. token = args.token
  31. else:
  32. token = None
  33. try:
  34. token = open(args.token_file).read().strip()
  35. except:
  36. pass
  37. fname = args.tex
  38. server = args.server
  39. if server[-1] != '/':
  40. server += "/"
  41. if not server.endswith("/v1/"):
  42. server += "v1/"
  43. def get_keys(filename, exclude_sub_dirs, import_base=None):
  44. try:
  45. if not os.path.isfile(filename):
  46. filename += ".tex"
  47. content = open(filename).read()
  48. except:
  49. return []
  50. # extract cites
  51. keys = set()
  52. cites = re.findall("\\\\(no)?citeA?\\{([^\\}]+)\\}", content)
  53. for key in cites:
  54. keys |= set(key[1].split(","))
  55. # find inputs/include and recursively parse them
  56. inputs = re.findall("\\\\(?:input|include)\\{([^\\}]+)\\}", content)
  57. for f in inputs:
  58. if import_base is not None:
  59. f = os.path.join(import_base, f)
  60. if exclude_sub_dirs:
  61. if os.path.dirname(os.path.abspath(f)) == os.path.dirname(os.path.abspath(filename)):
  62. keys |= set(get_keys(f, exclude_sub_dirs))
  63. else:
  64. keys |= set(get_keys(f, exclude_sub_dirs))
  65. # find subimports and recursively parse them
  66. subimports = re.findall("\\\\subimport\*?\\{(.*)\\}\\{(.*)\\}", content)
  67. for f in subimports:
  68. filepath = os.path.join(f[0], f[1])
  69. if exclude_sub_dirs:
  70. if os.path.dirname(os.path.abspath(f)) == os.path.dirname(os.path.abspath(filename)):
  71. keys |= set(get_keys(filepath, exclude_sub_dirs, import_base=f[0]))
  72. else:
  73. keys |= set(get_keys(filepath, exclude_sub_dirs, import_base=f[0]))
  74. keys = sorted(list([k.strip() for k in keys]))
  75. keys = [k for k in keys if len(k) != 0]
  76. return keys
  77. def keys_have_changed(keys):
  78. new_keys = hashlib.sha256("\n".join(keys).encode("utf-8")).hexdigest()
  79. old_keys = ""
  80. try:
  81. old_keys = open("main.bib.keys.sha").read().strip()
  82. except:
  83. pass
  84. try:
  85. open("main.bib.keys.sha", "w").write(new_keys)
  86. except:
  87. pass
  88. return (new_keys != old_keys)
  89. def bib_has_changed(bib):
  90. new_bib = hashlib.sha256(bib.strip().encode("utf-8")).hexdigest()
  91. old_bib = ""
  92. try:
  93. old_bib = open("main.bib.sha").read().strip()
  94. except:
  95. pass
  96. save_bib_hash()
  97. return (new_bib != old_bib)
  98. def entry_by_key(key):
  99. for entry in bib_database.entries:
  100. if entry["ID"] == key:
  101. return entry
  102. return None
  103. def entry_to_bibtex(entry):
  104. newdb = bibtexparser.bibdatabase.BibDatabase()
  105. newdb.entries = [entry]
  106. return bibtexparser.dumps(newdb)
  107. def inline_diff(a, b):
  108. matcher = difflib.SequenceMatcher(None, a, b)
  109. def process_tag(tag, i1, i2, j1, j2):
  110. if tag == 'replace':
  111. return '\u001b[34m[' + matcher.a[i1:i2] + ' -> ' + matcher.b[j1:j2] + ']\u001b[0m'
  112. if tag == 'delete':
  113. return '\u001b[31m[- ' + matcher.a[i1:i2] + ']\u001b[0m'
  114. if tag == 'equal':
  115. return matcher.a[i1:i2]
  116. if tag == 'insert':
  117. return '\u001b[32m[+ ' + matcher.b[j1:j2] + ']\u001b[0m'
  118. return ''.join(process_tag(*t) for t in matcher.get_opcodes())
  119. def resolve_changes():
  120. print("Your options are")
  121. print(" update server version with local changes (L)")
  122. print(" replace local version with server version (S)")
  123. print(" ignore, do not apply any changes (I)")
  124. print(" abort without changes (A)")
  125. while True:
  126. action = input("Your choice [l/s/I/a]: ").lower()
  127. if action == "l" or action == "s" or action == "a" or action == "i":
  128. return action
  129. if not action or action == "":
  130. return "i"
  131. return None
  132. def resolve_duplicate():
  133. print("Your options are")
  134. print(" commit local changes to server (M)")
  135. print(" delete server entry (D)")
  136. print(" remove local entry (R)")
  137. print(" ignore, do not apply any changes (I)")
  138. print(" abort without changes (A)")
  139. while True:
  140. action = input("Your choice [m/d/r/I/a]: ").lower()
  141. if action == "m" or action == "a" or action == "i" or action == "d" or action == "r":
  142. return action
  143. if not action or action == "":
  144. return "i"
  145. return None
  146. def resolve_policy_reject():
  147. print("Your options are")
  148. print(" force entry write to the server (F)")
  149. print(" ignore, do not apply any changes (I)")
  150. print(" abort without changes (A)")
  151. while True:
  152. action = input("Your choice [f/I/a]: ").lower()
  153. if action == "f" or action == "i" or action == "a":
  154. return action
  155. if not action or action == "":
  156. return "i"
  157. return None
  158. def update_local_bib(key, new_entry):
  159. for (idx, entry) in enumerate(bib_database.entries):
  160. if entry["ID"] == key:
  161. bib_database.entries[idx] = new_entry
  162. break
  163. def update_remote_bib(key, new_entry):
  164. response = requests.put(server + "entry/%s" % key, json={"entry": new_entry, "token": token})
  165. if "success" in response.json() and not response.json()["success"]:
  166. show_error(response.json())
  167. def add_remote_bib(key, entry, force=False):
  168. if force:
  169. # do not rely on boolean encoding of `force`
  170. response = requests.post(server + "entry/%s" % key, json={"entry": entry, "token": token, "force": "true"})
  171. else:
  172. response = requests.post(server + "entry/%s" % key, json={"entry": entry, "token": token})
  173. if "success" in response.json() and not response.json()["success"]:
  174. show_error(response.json())
  175. def remove_remote_bib(key):
  176. response = requests.delete(server + "entry/%s%s" % (key, "/%s" % token if token else ""))
  177. if "success" in response.json() and not response.json()["success"]:
  178. show_error(response.json())
  179. def remove_local_bib(key):
  180. for (idx, entry) in enumerate(bib_database.entries):
  181. if entry["ID"] == key:
  182. del bib_database.entries[idx]
  183. save_bib()
  184. def save_bib_hash():
  185. try:
  186. bib = open("main.bib").read()
  187. open("main.bib.sha", "w").write(hashlib.sha256(bib.strip().encode("utf-8")).hexdigest())
  188. except:
  189. pass
  190. def save_bib():
  191. with open('main.bib', 'w') as bibtex_file:
  192. bibtexparser.dump(bib_database, bibtex_file)
  193. save_bib_hash()
  194. def show_error(obj):
  195. if "reason" in obj:
  196. if obj["reason"] == "access_denied":
  197. print(
  198. "\u001b[31m[!] Access denied!\u001b[0m Your token is not valid for this operation. Verify whether the file '%s' contains a valid token."
  199. % args.token_file)
  200. elif obj["reason"] == "policy":
  201. for entry in obj["entries"]:
  202. print("\u001b[31m[!] Server policy rejected entry %s\u001b[0m. Reason: %s" %
  203. (entry["ID"], entry["reason"]))
  204. else:
  205. print("\u001b[31m[!] Unhandled error occurred!\u001b[0m Reason (%s) %s" %
  206. (obj["reason"], obj["message"] if "message" in obj else ""))
  207. else:
  208. print("\u001b[31m[!] Unknown error occurred!\u001b[0m")
  209. sys.exit(1)
  210. action = args.action
  211. parser = BibTexParser(common_strings=True)
  212. parser.ignore_nonstandard_types = False
  213. parser.homogenize_fields = True
  214. if not os.path.exists("main.bib") or os.stat("main.bib").st_size == 0:
  215. bib_database = bibtexparser.loads("\n")
  216. else:
  217. try:
  218. with open('main.bib') as bibtex_file:
  219. bib_database = bibtexparser.load(bibtex_file, parser)
  220. #print(bib_database.entries)
  221. except Exception as e:
  222. print("Malformed bibliography file!\n")
  223. print(e)
  224. sys.exit(1)
  225. response = requests.get(server + "version")
  226. try:
  227. version_info = response.json()
  228. except:
  229. print("\u001b[31m[!] Could not get version info from server.\u001b[0m Is the server URL \"%s\" correct?" % server)
  230. sys.exit(1)
  231. if version_info["version"] > version:
  232. print("[!] New version available, updating...")
  233. script = requests.get(server + version_info["url"])
  234. with open(sys.argv[0], "w") as sc:
  235. sc.write(script.text)
  236. print("Restarting...")
  237. os.execl(sys.executable, *([sys.executable] + sys.argv))
  238. if action == "search":
  239. if len(args.query) < 3:
  240. print("Usage: %s search --query <query>" % sys.argv[0])
  241. sys.exit(1)
  242. response = requests.get(server + "search/" + args.query + ("/%s" % token if token else ""))
  243. print(response.text)
  244. elif action == "sync":
  245. response = requests.get(server + "sync")
  246. print(response.text)
  247. elif action == "get":
  248. keys = get_keys(args.tex, args.exclude_sub_dirs)
  249. print(keys)
  250. fetch = keys_have_changed(keys)
  251. try:
  252. current_bib = open("main.bib").read()
  253. update = bib_has_changed(current_bib)
  254. except:
  255. update = False
  256. fetch = True
  257. if update:
  258. fetch = True
  259. if not limit_traffic:
  260. update = True
  261. fetch = True
  262. #print("fetch %d, update %d\n" % (fetch, update))
  263. # update
  264. if update:
  265. response = requests.post(server + "update", json={"entries": bib_database.entries, "token": token})
  266. result = response.json()
  267. if not result["success"]:
  268. if result["reason"] == "policy":
  269. #print(result["entries"])
  270. for entry in result["entries"]:
  271. print("\n[!] Server policy rejected entry %s. Reason: %s" % (entry["ID"], entry["reason"]))
  272. action = resolve_policy_reject()
  273. if action == "i":
  274. pass
  275. elif action == "a":
  276. sys.exit(1)
  277. elif action == "f":
  278. add_remote_bib(entry["ID"], entry_by_key(entry["ID"]), force=True)
  279. elif result["reason"] == "duplicate":
  280. #print(result["entries"])
  281. for dup in result["entries"]:
  282. print("\n[!] There is already a similar entry for %s on the server (%s) [Levenshtein %d]" %
  283. (dup[1], dup[2]["ID"], dup[0]))
  284. print("- Local -")
  285. local = entry_to_bibtex(entry_by_key(dup[1]))
  286. remote = entry_to_bibtex(dup[2])
  287. print(local)
  288. print("- Server -")
  289. print(remote)
  290. print("- Diff - ")
  291. print(inline_diff(remote, local))
  292. if dup[1] != dup[2]["ID"]:
  293. # different key, similar entry
  294. action = resolve_duplicate()
  295. if action == "i":
  296. pass
  297. elif action == "a":
  298. sys.exit(1)
  299. elif action == "d":
  300. remove_remote_bib(dup[2]["ID"])
  301. elif action == "m":
  302. add_remote_bib(dup[1], entry_by_key(dup[1]))
  303. elif action == "r":
  304. remove_local_bib(dup[1])
  305. else:
  306. # same key
  307. action = resolve_changes()
  308. if action == "a":
  309. sys.exit(1)
  310. elif action == "i":
  311. pass
  312. elif action == "s":
  313. update_local_bib(dup[1], dup[2])
  314. save_bib()
  315. elif action == "l":
  316. update_remote_bib(dup[2]["ID"], entry_by_key(dup[1]))
  317. else:
  318. show_error(result)
  319. if fetch:
  320. response = requests.post(server + "get_json", json={"entries": keys, "token": token})
  321. bib = response.json()
  322. if "success" in bib and not bib["success"]:
  323. show_error(bib)
  324. else:
  325. # merge local and remote database
  326. for entry in bib:
  327. if entry and "ID" in entry and not entry_by_key(entry["ID"]):
  328. bib_database.entries.append(entry)
  329. save_bib()
  330. # suggest keys for unresolved keys
  331. for key in keys:
  332. if not entry_by_key(key) and not '#' in key:
  333. response = requests.get(server + "suggest/" + key + ("/%s" % (token if token else "")))
  334. suggest = response.json()
  335. if "success" in suggest and not suggest["success"]:
  336. show_error(suggest)
  337. else:
  338. print("Key '%s' not found%s %s" %
  339. (key, ", did you mean any of these?" if len(suggest["entries"]) > 0 else "", ", ".join(
  340. ["'%s'" % e[1]["ID"] for e in suggest["entries"]])))
  341. else:
  342. print("Unknown action '%s'" % action)