[v3] Support conjunctive keyword search in RPC interface

Message ID 20200706011906.7214-1-kevr.gtalk@gmail.com
State New
Headers show
Series [v3] Support conjunctive keyword search in RPC interface | expand

Commit Message

Kevin Morris July 6, 2020, 1:19 a.m. UTC
Newly supported API Version 6 modifies `type=search` for _by_ type
`name-desc`: it now behaves the same as `name-desc` search through the
https://aur.archlinux.org/packages/ search page.

Search for packages containing the literal keyword `blah blah` AND `haha`:
https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha

Search for packages containing the literal keyword `abc 123`:
https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"

The following example searches for packages that contain `blah` AND `abc`:
https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc

The legacy method still searches for packages that contain `blah abc`:
https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc

API Version 6 is currently only considered during a `search` of `name-desc`.

Note: This change was written as a solution to
https://bugs.archlinux.org/task/49133.

PS: + Some spacing issues fixed in comments.

Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com>
---
 doc/rpc.txt               |  4 ++++
 web/lib/aurjson.class.php | 33 +++++++++++++++++++++++----------
 web/lib/pkgfuncs.inc.php  | 36 +++++++++++++++++++++++-------------
 3 files changed, 50 insertions(+), 23 deletions(-)

Comments

Kevin Morris July 6, 2020, 1:20 a.m. UTC | #1
Alright, just left `name` as it exists in both cases, for rpc and web
search.

Should be all good now, hopefully?

On Sun, Jul 5, 2020 at 6:19 PM Kevin Morris <kevr.gtalk@gmail.com> wrote:

> Newly supported API Version 6 modifies `type=search` for _by_ type
> `name-desc`: it now behaves the same as `name-desc` search through the
> https://aur.archlinux.org/packages/ search page.
>
> Search for packages containing the literal keyword `blah blah` AND `haha`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
>
> Search for packages containing the literal keyword `abc 123`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
>
> The following example searches for packages that contain `blah` AND `abc`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
>
> The legacy method still searches for packages that contain `blah abc`:
> https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
> https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
>
> API Version 6 is currently only considered during a `search` of
> `name-desc`.
>
> Note: This change was written as a solution to
> https://bugs.archlinux.org/task/49133.
>
> PS: + Some spacing issues fixed in comments.
>
> Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com>
> ---
>  doc/rpc.txt               |  4 ++++
>  web/lib/aurjson.class.php | 33 +++++++++++++++++++++++----------
>  web/lib/pkgfuncs.inc.php  | 36 +++++++++++++++++++++++-------------
>  3 files changed, 50 insertions(+), 23 deletions(-)
>
> diff --git a/doc/rpc.txt b/doc/rpc.txt
> index 3148ebea..b0f5c4e1 100644
> --- a/doc/rpc.txt
> +++ b/doc/rpc.txt
> @@ -39,6 +39,10 @@ Examples
>    `/rpc/?v=5&type=search&by=makedepends&arg=boost`
>  `search` with callback::
>    `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103`
> +`search` with API Version 6 for packages containing `cookie` AND `milk`::
> +  `/rpc/?v=6&type=search&arg=cookie%20milk`
> +`search` with API Version 6 for packages containing `cookie milk`::
> +  `/rpc/?v=6&type=search&arg="cookie milk"`
>  `info`::
>    `/rpc/?v=5&type=info&arg[]=foobar`
>  `info` with multiple packages::
> diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php
> index 0ac586fe..b92e9094 100644
> --- a/web/lib/aurjson.class.php
> +++ b/web/lib/aurjson.class.php
> @@ -1,6 +1,7 @@
>  <?php
>
>  include_once("aur.inc.php");
> +include_once("pkgfuncs.inc.php");
>
>  /*
>   * This class defines a remote interface for fetching data from the AUR
> using
> @@ -80,7 +81,7 @@ class AurJSON {
>                 if (isset($http_data['v'])) {
>                         $this->version = intval($http_data['v']);
>                 }
> -               if ($this->version < 1 || $this->version > 5) {
> +               if ($this->version < 1 || $this->version > 6) {
>                         return $this->json_error('Invalid version
> specified.');
>                 }
>
> @@ -140,7 +141,7 @@ class AurJSON {
>         }
>
>         /*
> -        * Check if an IP needs to be  rate limited.
> +        * Check if an IP needs to be rate limited.
>          *
>          * @param $ip IP of the current request
>          *
> @@ -192,7 +193,7 @@ class AurJSON {
>                 $value = get_cache_value('ratelimit-ws:' . $ip, $status);
>                 if (!$status || ($status && $value < $deletion_time)) {
>                         if (set_cache_value('ratelimit-ws:' . $ip, $time,
> $window_length) &&
> -                           set_cache_value('ratelimit:' . $ip, 1,
> $window_length)) {
> +                               set_cache_value('ratelimit:' . $ip, 1,
> $window_length)) {
>                                 return;
>                         }
>                 } else {
> @@ -370,7 +371,7 @@ class AurJSON {
>                 } elseif ($this->version >= 2) {
>                         if ($this->version == 2 || $this->version == 3) {
>                                 $fields = implode(',', self::$fields_v2);
> -                       } else if ($this->version == 4 || $this->version
> == 5) {
> +                       } else if ($this->version >= 4 && $this->version
> <= 6) {
>                                 $fields = implode(',', self::$fields_v4);
>                         }
>                         $query = "SELECT {$fields} " .
> @@ -492,13 +493,25 @@ class AurJSON {
>                         if (strlen($keyword_string) < 2) {
>                                 return $this->json_error('Query arg too
> small.');
>                         }
> -                       $keyword_string = $this->dbh->quote("%" .
> addcslashes($keyword_string, '%_') . "%");
>
> -                       if ($search_by === 'name') {
> -                               $where_condition = "(Packages.Name LIKE
> $keyword_string)";
> -                       } else if ($search_by === 'name-desc') {
> -                               $where_condition = "(Packages.Name LIKE
> $keyword_string OR ";
> -                               $where_condition .= "Description LIKE
> $keyword_string)";
> +                       if ($this->version >= 6 && $search_by ===
> 'name-desc') {
> +
> +                               // API Version 6 with _by_ `name-desc`,
> create a conjunctive query.
> +                               $where_condition =
> construct_keyword_search($this->dbh,
> +                                       $keyword_string, true, false);
> +
> +                       } else {
> +
> +                               $keyword_string = $this->dbh->quote(
> +                                       "%" . addcslashes($keyword_string,
> '%_') . "%");
> +
> +                               if ($search_by === 'name') {
> +                                       $where_condition = "(Packages.Name
> LIKE $keyword_string)";
> +                               } else if ($search_by === 'name-desc') {
> +                                       $where_condition = "(Packages.Name
> LIKE $keyword_string ";
> +                                       $where_condition .= "OR
> Description LIKE $keyword_string)";
> +                               }
> +
>                         }
>                 } else if ($search_by === 'maintainer') {
>                         if (empty($keyword_string)) {
> diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php
> index 8c915711..83d49b0f 100644
> --- a/web/lib/pkgfuncs.inc.php
> +++ b/web/lib/pkgfuncs.inc.php
> @@ -696,8 +696,10 @@ function pkg_search_page($params, $show_headers=true,
> $SID="") {
>                         $q_where .= "AND (PackageBases.Name LIKE " .
> $dbh->quote($K) . ") ";
>                 }
>                 elseif (isset($params["SeB"]) && $params["SeB"] == "k") {
> -                       /* Search by keywords. */
> -                       $q_where .= construct_keyword_search($dbh,
> $params['K'], false);
> +                       /* Search by name. */
> +                       $q_where .= "AND (";
> +                       $q_where .= construct_keyword_search($dbh,
> $params['K'], false, true);
> +                       $q_where .= ") ";
>                 }
>                 elseif (isset($params["SeB"]) && $params["SeB"] == "N") {
>                         /* Search by name (exact match). */
> @@ -709,7 +711,9 @@ function pkg_search_page($params, $show_headers=true,
> $SID="") {
>                 }
>                 else {
>                         /* Keyword search (default). */
> -                       $q_where .= construct_keyword_search($dbh,
> $params['K'], true);
> +                       $q_where .= "AND (";
> +                       $q_where .= construct_keyword_search($dbh,
> $params['K'], true, true);
> +                       $q_where .= ") ";
>                 }
>         }
>
> @@ -833,10 +837,11 @@ function pkg_search_page($params,
> $show_headers=true, $SID="") {
>   * @param handle $dbh Database handle
>   * @param string $keywords The search term
>   * @param bool $namedesc Search name and description fields
> + * @param bool $keyword Search packages with a matching
> PackageBases.Keyword
>   *
>   * @return string WHERE part of the SQL clause
>   */
> -function construct_keyword_search($dbh, $keywords, $namedesc) {
> +function construct_keyword_search($dbh, $keywords, $namedesc,
> $keyword=false) {
>         $count = 0;
>         $where_part = "";
>         $q_keywords = "";
> @@ -863,11 +868,20 @@ function construct_keyword_search($dbh, $keywords,
> $namedesc) {
>                 $q_keywords .= $op . " (";
>                 if ($namedesc) {
>                         $q_keywords .= "Packages.Name LIKE " .
> $dbh->quote($term) . " OR ";
> -                       $q_keywords .= "Description LIKE " .
> $dbh->quote($term) . " OR ";
> +                       $q_keywords .= "Description LIKE " .
> $dbh->quote($term) . " ";
> +               }
> +               else {
> +                       $q_keywords .= "Packages.Name LIKE " .
> $dbh->quote($term) . " ";
> +               }
> +
> +               if ($keyword) {
> +                       $q_keywords .= "OR EXISTS (SELECT * FROM
> PackageKeywords WHERE ";
> +                       $q_keywords .= "PackageKeywords.PackageBaseID =
> Packages.PackageBaseID AND ";
> +                       $q_keywords .= "PackageKeywords.Keyword LIKE " .
> $dbh->quote($term) . ")) ";
> +               }
> +               else {
> +                       $q_keywords .= ") ";
>                 }
> -               $q_keywords .= "EXISTS (SELECT * FROM PackageKeywords
> WHERE ";
> -               $q_keywords .= "PackageKeywords.PackageBaseID =
> Packages.PackageBaseID AND ";
> -               $q_keywords .= "PackageKeywords.Keyword LIKE " .
> $dbh->quote($term) . ")) ";
>
>                 $count++;
>                 if ($count >= 20) {
> @@ -876,11 +890,7 @@ function construct_keyword_search($dbh, $keywords,
> $namedesc) {
>                 $op = "AND ";
>         }
>
> -       if (!empty($q_keywords)) {
> -               $where_part = "AND (" . $q_keywords . ") ";
> -       }
> -
> -       return $where_part;
> +       return $q_keywords;
>  }
>
>  /**
> --
> 2.20.1
>
>
Lukas Fleischer July 6, 2020, 8:36 p.m. UTC | #2
On Sun, 05 Jul 2020 at 21:19:06, Kevin Morris wrote:
> Newly supported API Version 6 modifies `type=search` for _by_ type
> `name-desc`: it now behaves the same as `name-desc` search through the
> https://aur.archlinux.org/packages/ search page.
> 
> Search for packages containing the literal keyword `blah blah` AND `haha`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
> 
> Search for packages containing the literal keyword `abc 123`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
> 
> The following example searches for packages that contain `blah` AND `abc`:
> https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
> 
> The legacy method still searches for packages that contain `blah abc`:
> https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
> https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
> 
> API Version 6 is currently only considered during a `search` of `name-desc`.
> 
> Note: This change was written as a solution to
> https://bugs.archlinux.org/task/49133.
> 
> PS: + Some spacing issues fixed in comments.
> 
> Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com>
> ---
>  doc/rpc.txt               |  4 ++++
>  web/lib/aurjson.class.php | 33 +++++++++++++++++++++++----------
>  web/lib/pkgfuncs.inc.php  | 36 +++++++++++++++++++++++-------------
>  3 files changed, 50 insertions(+), 23 deletions(-)

Awesome! Pushed to pu with some minor space changes and code
deduplication. Thanks!
Kevin Morris July 6, 2020, 8:39 p.m. UTC | #3
Sweet! Thanks for all your time/help with this, Lukas. I'm growing more
familiar with the codebase, I'll work on reducing the amount of patchsets
I send through for stuff. Sorry about all of them.

On Mon, Jul 6, 2020 at 1:36 PM Lukas Fleischer <lfleischer@archlinux.org>
wrote:

> On Sun, 05 Jul 2020 at 21:19:06, Kevin Morris wrote:
> > Newly supported API Version 6 modifies `type=search` for _by_ type
> > `name-desc`: it now behaves the same as `name-desc` search through the
> > https://aur.archlinux.org/packages/ search page.
> >
> > Search for packages containing the literal keyword `blah blah` AND
> `haha`:
> > https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
> >
> > Search for packages containing the literal keyword `abc 123`:
> > https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
> >
> > The following example searches for packages that contain `blah` AND
> `abc`:
> > https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
> >
> > The legacy method still searches for packages that contain `blah abc`:
> > https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
> > https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
> >
> > API Version 6 is currently only considered during a `search` of
> `name-desc`.
> >
> > Note: This change was written as a solution to
> > https://bugs.archlinux.org/task/49133.
> >
> > PS: + Some spacing issues fixed in comments.
> >
> > Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com>
> > ---
> >  doc/rpc.txt               |  4 ++++
> >  web/lib/aurjson.class.php | 33 +++++++++++++++++++++++----------
> >  web/lib/pkgfuncs.inc.php  | 36 +++++++++++++++++++++++-------------
> >  3 files changed, 50 insertions(+), 23 deletions(-)
>
> Awesome! Pushed to pu with some minor space changes and code
> deduplication. Thanks!
>
Lukas Fleischer July 6, 2020, 8:47 p.m. UTC | #4
On Mon, 06 Jul 2020 at 16:39:59, Kevin Morris wrote:
> Sweet! Thanks for all your time/help with this, Lukas. I'm growing more
> familiar with the codebase, I'll work on reducing the amount of patchsets
> I send through for stuff. Sorry about all of them.

No worries, as long as revisions are clearly marked, such that it's easy
to keep track of what the latest revision is, that's not a problem at
all.

One more tip: You can use `git send-email --annotate` and add a comment
between the "---" separator and the diff stat which eliminates the need
to send a separate email with comments/updates (that message only
appears in the email, not in the applied patch):

> [...]
> > > Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com>
> > > ---

Add text here.

> > >  doc/rpc.txt               |  4 ++++
> > >  web/lib/aurjson.class.php | 33 +++++++++++++++++++++++----------
> > >  web/lib/pkgfuncs.inc.php  | 36 +++++++++++++++++++++++-------------
> > >  3 files changed, 50 insertions(+), 23 deletions(-)

Patch

diff --git a/doc/rpc.txt b/doc/rpc.txt
index 3148ebea..b0f5c4e1 100644
--- a/doc/rpc.txt
+++ b/doc/rpc.txt
@@ -39,6 +39,10 @@  Examples
   `/rpc/?v=5&type=search&by=makedepends&arg=boost`
 `search` with callback::
   `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103`
+`search` with API Version 6 for packages containing `cookie` AND `milk`::
+  `/rpc/?v=6&type=search&arg=cookie%20milk`
+`search` with API Version 6 for packages containing `cookie milk`::
+  `/rpc/?v=6&type=search&arg="cookie milk"`
 `info`::
   `/rpc/?v=5&type=info&arg[]=foobar`
 `info` with multiple packages::
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php
index 0ac586fe..b92e9094 100644
--- a/web/lib/aurjson.class.php
+++ b/web/lib/aurjson.class.php
@@ -1,6 +1,7 @@ 
 <?php
 
 include_once("aur.inc.php");
+include_once("pkgfuncs.inc.php");
 
 /*
  * This class defines a remote interface for fetching data from the AUR using
@@ -80,7 +81,7 @@  class AurJSON {
 		if (isset($http_data['v'])) {
 			$this->version = intval($http_data['v']);
 		}
-		if ($this->version < 1 || $this->version > 5) {
+		if ($this->version < 1 || $this->version > 6) {
 			return $this->json_error('Invalid version specified.');
 		}
 
@@ -140,7 +141,7 @@  class AurJSON {
 	}
 
 	/*
-	 * Check if an IP needs to be  rate limited.
+	 * Check if an IP needs to be rate limited.
 	 *
 	 * @param $ip IP of the current request
 	 *
@@ -192,7 +193,7 @@  class AurJSON {
 		$value = get_cache_value('ratelimit-ws:' . $ip, $status);
 		if (!$status || ($status && $value < $deletion_time)) {
 			if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) &&
-			    set_cache_value('ratelimit:' . $ip, 1, $window_length)) {
+				set_cache_value('ratelimit:' . $ip, 1, $window_length)) {
 				return;
 			}
 		} else {
@@ -370,7 +371,7 @@  class AurJSON {
 		} elseif ($this->version >= 2) {
 			if ($this->version == 2 || $this->version == 3) {
 				$fields = implode(',', self::$fields_v2);
-			} else if ($this->version == 4 || $this->version == 5) {
+			} else if ($this->version >= 4 && $this->version <= 6) {
 				$fields = implode(',', self::$fields_v4);
 			}
 			$query = "SELECT {$fields} " .
@@ -492,13 +493,25 @@  class AurJSON {
 			if (strlen($keyword_string) < 2) {
 				return $this->json_error('Query arg too small.');
 			}
-			$keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%");
 
-			if ($search_by === 'name') {
-				$where_condition = "(Packages.Name LIKE $keyword_string)";
-			} else if ($search_by === 'name-desc') {
-				$where_condition = "(Packages.Name LIKE $keyword_string OR ";
-				$where_condition .= "Description LIKE $keyword_string)";
+			if ($this->version >= 6 && $search_by === 'name-desc') {
+
+				// API Version 6 with _by_ `name-desc`, create a conjunctive query.
+				$where_condition = construct_keyword_search($this->dbh,
+					$keyword_string, true, false);
+
+			} else {
+
+				$keyword_string = $this->dbh->quote(
+					"%" . addcslashes($keyword_string, '%_') . "%");
+
+				if ($search_by === 'name') {
+					$where_condition = "(Packages.Name LIKE $keyword_string)";
+				} else if ($search_by === 'name-desc') {
+					$where_condition = "(Packages.Name LIKE $keyword_string ";
+					$where_condition .= "OR Description LIKE $keyword_string)";
+				}
+
 			}
 		} else if ($search_by === 'maintainer') {
 			if (empty($keyword_string)) {
diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php
index 8c915711..83d49b0f 100644
--- a/web/lib/pkgfuncs.inc.php
+++ b/web/lib/pkgfuncs.inc.php
@@ -696,8 +696,10 @@  function pkg_search_page($params, $show_headers=true, $SID="") {
 			$q_where .= "AND (PackageBases.Name LIKE " . $dbh->quote($K) . ") ";
 		}
 		elseif (isset($params["SeB"]) && $params["SeB"] == "k") {
-			/* Search by keywords. */
-			$q_where .= construct_keyword_search($dbh, $params['K'], false);
+			/* Search by name. */
+			$q_where .= "AND (";
+			$q_where .= construct_keyword_search($dbh, $params['K'], false, true);
+			$q_where .= ") ";
 		}
 		elseif (isset($params["SeB"]) && $params["SeB"] == "N") {
 			/* Search by name (exact match). */
@@ -709,7 +711,9 @@  function pkg_search_page($params, $show_headers=true, $SID="") {
 		}
 		else {
 			/* Keyword search (default). */
-			$q_where .= construct_keyword_search($dbh, $params['K'], true);
+			$q_where .= "AND (";
+			$q_where .= construct_keyword_search($dbh, $params['K'], true, true);
+			$q_where .= ") ";
 		}
 	}
 
@@ -833,10 +837,11 @@  function pkg_search_page($params, $show_headers=true, $SID="") {
  * @param handle $dbh Database handle
  * @param string $keywords The search term
  * @param bool $namedesc Search name and description fields
+ * @param bool $keyword Search packages with a matching PackageBases.Keyword
  *
  * @return string WHERE part of the SQL clause
  */
-function construct_keyword_search($dbh, $keywords, $namedesc) {
+function construct_keyword_search($dbh, $keywords, $namedesc, $keyword=false) {
 	$count = 0;
 	$where_part = "";
 	$q_keywords = "";
@@ -863,11 +868,20 @@  function construct_keyword_search($dbh, $keywords, $namedesc) {
 		$q_keywords .= $op . " (";
 		if ($namedesc) {
 			$q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " OR ";
-			$q_keywords .= "Description LIKE " . $dbh->quote($term) . " OR ";
+			$q_keywords .= "Description LIKE " . $dbh->quote($term) . " ";
+		}
+		else {
+			$q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " ";
+		}
+
+		if ($keyword) {
+			$q_keywords .= "OR EXISTS (SELECT * FROM PackageKeywords WHERE ";
+			$q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND ";
+			$q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) ";
+		}
+		else {
+			$q_keywords .= ") ";
 		}
-		$q_keywords .= "EXISTS (SELECT * FROM PackageKeywords WHERE ";
-		$q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND ";
-		$q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) ";
 
 		$count++;
 		if ($count >= 20) {
@@ -876,11 +890,7 @@  function construct_keyword_search($dbh, $keywords, $namedesc) {
 		$op = "AND ";
 	}
 
-	if (!empty($q_keywords)) {
-		$where_part = "AND (" . $q_keywords . ") ";
-	}
-
-	return $where_part;
+	return $q_keywords;
 }
 
 /**